// Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. // Ported from: https://github.com/SixLabors/Fonts/ using System; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Threading; using Avalonia.Utilities; namespace Avalonia.Media.TextFormatting.Unicode { /// /// Implementation of Unicode bidirectional algorithm (UAX #9) /// https://unicode.org/reports/tr9/ /// /// /// /// The Bidi algorithm uses a number of memory arrays for resolved /// types, level information, bracket types, x9 removal maps and /// more... /// /// /// This implementation of the BiDi algorithm has been designed /// to reduce memory pressure on the GC by re-using the same /// work buffers, so instances of this class should be re-used /// as much as possible. /// /// internal sealed class BidiAlgorithm { /// /// The original BiDiClass classes as provided by the caller /// private ArraySlice _originalClasses; /// /// Paired bracket types as provided by caller /// private ArraySlice _pairedBracketTypes; /// /// Paired bracket values as provided by caller /// private ArraySlice _pairedBracketValues; /// /// Try if the incoming data is known to contain brackets /// private bool _hasBrackets; /// /// True if the incoming data is known to contain embedding runs /// private bool _hasEmbeddings; /// /// True if the incoming data is known to contain isolating runs /// private bool _hasIsolates; /// /// Two directional mapping of isolate start/end pairs /// /// /// The forward mapping maps the start index to the end index. /// The reverse mapping maps the end index to the start index. /// private readonly BidiDictionary _isolatePairs = new BidiDictionary(); /// /// The working BiDi classes /// private ArraySlice _workingClasses; /// /// The working classes buffer /// private ArrayBuilder _workingClassesBuffer; /// /// A slice of the resolved levels /// private ArraySlice _resolvedLevels; /// /// The buffer underlying resolvedLevels /// private ArrayBuilder _resolvedLevelsBuffer; /// /// The resolve paragraph embedding level /// private sbyte _paragraphEmbeddingLevel; /// /// The status stack used during resolution of explicit /// embedding and isolating runs /// private readonly Stack _statusStack = new Stack(); /// /// Mapping used to virtually remove characters for rule X9 /// private ArrayBuilder _x9Map; /// /// Re-usable list of level runs /// private readonly List _levelRuns = new List(); /// /// Mapping for the current isolating sequence, built /// by joining level runs from the x9 map. /// private ArrayBuilder _isolatedRunMapping; /// /// A stack of pending isolate openings used by FindIsolatePairs() /// private readonly Stack _pendingIsolateOpenings = new Stack(); /// /// The level of the isolating run currently being processed /// private int _runLevel; /// /// The direction of the isolating run currently being processed /// private BidiClass _runDirection; /// /// The length of the isolating run currently being processed /// private int _runLength; /// /// A mapped slice of the resolved types for the isolating run currently /// being processed /// private MappedArraySlice _runResolvedClasses; /// /// A mapped slice of the original types for the isolating run currently /// being processed /// private MappedArraySlice _runOriginalClasses; /// /// A mapped slice of the run levels for the isolating run currently /// being processed /// private MappedArraySlice _runLevels; /// /// A mapped slice of the paired bracket types of the isolating /// run currently being processed /// private MappedArraySlice _runBiDiPairedBracketTypes; /// /// A mapped slice of the paired bracket values of the isolating /// run currently being processed /// private MappedArraySlice _runPairedBracketValues; /// /// Maximum pairing depth for paired brackets /// private const int MaxPairedBracketDepth = 63; /// /// Reusable list of pending opening brackets used by the /// LocatePairedBrackets method /// private readonly List _pendingOpeningBrackets = new List(); /// /// Resolved list of paired brackets /// private readonly List _pairedBrackets = new List(); /// /// Initializes a new instance of the class. /// internal BidiAlgorithm() { } /// /// Gets the resolved levels. /// public ArraySlice ResolvedLevels => _resolvedLevels; /// /// Gets the resolved paragraph embedding level /// public int ResolvedParagraphEmbeddingLevel => _paragraphEmbeddingLevel; /// /// Process data from a BiDiData instance /// /// The BiDi Unicode data. public void Process(BidiData data) => Process( data.Classes, data.PairedBracketTypes, data.PairedBracketValues, data.ParagraphEmbeddingLevel, data.HasBrackets, data.HasEmbeddings, data.HasIsolates, null); /// /// Processes Bidi Data /// public void Process( ArraySlice types, ArraySlice pairedBracketTypes, ArraySlice pairedBracketValues, sbyte paragraphEmbeddingLevel, bool? hasBrackets, bool? hasEmbeddings, bool? hasIsolates, ArraySlice? outLevels) { // Reset state _isolatePairs.Clear(); _workingClassesBuffer.Clear(); _levelRuns.Clear(); _resolvedLevelsBuffer.Clear(); if (types.IsEmpty) { return; } // Setup original types and working types _originalClasses = types; _workingClasses = _workingClassesBuffer.Add(types); // Capture paired bracket values and types _pairedBracketTypes = pairedBracketTypes; _pairedBracketValues = pairedBracketValues; // Store things we know _hasBrackets = hasBrackets ?? _pairedBracketTypes.Length == _originalClasses.Length; _hasEmbeddings = hasEmbeddings ?? true; _hasIsolates = hasIsolates ?? true; // Find all isolate pairs FindIsolatePairs(); // Resolve the paragraph embedding level if (paragraphEmbeddingLevel == 2) { _paragraphEmbeddingLevel = ResolveEmbeddingLevel(_originalClasses); } else { _paragraphEmbeddingLevel = paragraphEmbeddingLevel; } // Create resolved levels buffer if (outLevels.HasValue) { if (outLevels.Value.Length != _originalClasses.Length) { throw new ArgumentException("Out levels must be the same length as the input data"); } _resolvedLevels = outLevels.Value; } else { _resolvedLevels = _resolvedLevelsBuffer.Add(_originalClasses.Length); _resolvedLevels.Fill(_paragraphEmbeddingLevel); } // Resolve explicit embedding levels (Rules X1-X8) ResolveExplicitEmbeddingLevels(); // Build the rule X9 map BuildX9RemovalMap(); // Process all isolated run sequences ProcessIsolatedRunSequences(); // Reset whitespace levels ResetWhitespaceLevels(); // Clean up AssignLevelsToCodePointsRemovedByX9(); } /// /// Resolve the paragraph embedding level if not explicitly passed /// by the caller. Also used by rule X5c for FSI isolating sequences. /// /// The data to be evaluated /// The resolved embedding level public sbyte ResolveEmbeddingLevel(ArraySlice data) { // P2 for (var i = 0; i < data.Length; ++i) { switch (data[i]) { case BidiClass.LeftToRight: // P3 return 0; case BidiClass.ArabicLetter: case BidiClass.RightToLeft: // P3 return 1; case BidiClass.FirstStrongIsolate: case BidiClass.LeftToRightIsolate: case BidiClass.RightToLeftIsolate: // Skip isolate pairs // (Because we're working with a slice, we need to adjust the indices // we're using for the isolatePairs map) if (_isolatePairs.TryGetValue(data.Start + i, out i)) { i -= data.Start; } else { i = data.Length; } break; } } // P3 return 0; } /// /// Build a list of matching isolates for a directionality slice /// Implements BD9 /// private void FindIsolatePairs() { // Redundant? if (!_hasIsolates) { return; } // Lets double check this as we go and clear the flag // if there actually aren't any isolate pairs as this might // mean we can skip some later steps _hasIsolates = false; // BD9... _pendingIsolateOpenings.Clear(); for (var i = 0; i < _originalClasses.Length; i++) { var t = _originalClasses[i]; switch (t) { case BidiClass.LeftToRightIsolate: case BidiClass.RightToLeftIsolate: case BidiClass.FirstStrongIsolate: { _pendingIsolateOpenings.Push(i); _hasIsolates = true; break; } case BidiClass.PopDirectionalIsolate: { if (_pendingIsolateOpenings.Count > 0) { _isolatePairs.Add(_pendingIsolateOpenings.Pop(), i); } _hasIsolates = true; break; } } } } /// /// Resolve the explicit embedding levels from the original /// data. Implements rules X1 to X8. /// private void ResolveExplicitEmbeddingLevels() { // Redundant? if (!_hasIsolates && !_hasEmbeddings) { return; } // Work variables _statusStack.Clear(); var overflowIsolateCount = 0; var overflowEmbeddingCount = 0; var validIsolateCount = 0; // Constants const int maxStackDepth = 125; // Rule X1 - setup initial state _statusStack.Clear(); // Neutral _statusStack.Push(new Status(_paragraphEmbeddingLevel, BidiClass.OtherNeutral, false)); // Process all characters for (var i = 0; i < _originalClasses.Length; i++) { switch (_originalClasses[i]) { case BidiClass.RightToLeftEmbedding: { // Rule X2 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 1) | 1); if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status(newLevel, BidiClass.OtherNeutral, false)); _resolvedLevels[i] = newLevel; } else if (overflowIsolateCount == 0) { overflowEmbeddingCount++; } break; } case BidiClass.LeftToRightEmbedding: { // Rule X3 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 2) & ~1); if (newLevel < maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status(newLevel, BidiClass.OtherNeutral, false)); _resolvedLevels[i] = newLevel; } else if (overflowIsolateCount == 0) { overflowEmbeddingCount++; } break; } case BidiClass.RightToLeftOverride: { // Rule X4 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 1) | 1); if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status(newLevel, BidiClass.RightToLeft, false)); _resolvedLevels[i] = newLevel; } else if (overflowIsolateCount == 0) { overflowEmbeddingCount++; } break; } case BidiClass.LeftToRightOverride: { // Rule X5 var newLevel = (sbyte)((_statusStack.Peek().EmbeddingLevel + 2) & ~1); if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { _statusStack.Push(new Status(newLevel, BidiClass.LeftToRight, false)); _resolvedLevels[i] = newLevel; } else if (overflowIsolateCount == 0) { overflowEmbeddingCount++; } break; } case BidiClass.RightToLeftIsolate: case BidiClass.LeftToRightIsolate: case BidiClass.FirstStrongIsolate: { // Rule X5a, X5b and X5c var resolvedIsolate = _originalClasses[i]; if (resolvedIsolate == BidiClass.FirstStrongIsolate) { if (!_isolatePairs.TryGetValue(i, out var endOfIsolate)) { endOfIsolate = _originalClasses.Length; } // Rule X5c if (ResolveEmbeddingLevel(_originalClasses.Slice(i + 1,endOfIsolate - (i + 1))) == 1) { resolvedIsolate = BidiClass.RightToLeftIsolate; } else { resolvedIsolate = BidiClass.LeftToRightIsolate; } } // Replace RLI's level with current embedding level var tos = _statusStack.Peek(); _resolvedLevels[i] = tos.EmbeddingLevel; // Apply override if (tos.OverrideStatus != BidiClass.OtherNeutral) { _workingClasses[i] = tos.OverrideStatus; } // Work out new level sbyte newLevel; if (resolvedIsolate == BidiClass.RightToLeftIsolate) { newLevel = (sbyte)((tos.EmbeddingLevel + 1) | 1); } else { newLevel = (sbyte)((tos.EmbeddingLevel + 2) & ~1); } // Valid? if (newLevel <= maxStackDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0) { validIsolateCount++; _statusStack.Push(new Status(newLevel, BidiClass.OtherNeutral, true)); } else { overflowIsolateCount++; } break; } case BidiClass.BoundaryNeutral: { // Mentioned in rule X6 - "for all types besides ..., BN, ..." // no-op break; } default: { // Rule X6 var tos = _statusStack.Peek(); _resolvedLevels[i] = tos.EmbeddingLevel; if (tos.OverrideStatus != BidiClass.OtherNeutral) { _workingClasses[i] = tos.OverrideStatus; } break; } case BidiClass.PopDirectionalIsolate: { // Rule X6a if (overflowIsolateCount > 0) { overflowIsolateCount--; } else if (validIsolateCount != 0) { overflowEmbeddingCount = 0; while (!_statusStack.Peek().IsolateStatus) { _statusStack.Pop(); } _statusStack.Pop(); validIsolateCount--; } var tos = _statusStack.Peek(); _resolvedLevels[i] = tos.EmbeddingLevel; if (tos.OverrideStatus != BidiClass.OtherNeutral) { _workingClasses[i] = tos.OverrideStatus; } break; } case BidiClass.PopDirectionalFormat: { // Rule X7 if (overflowIsolateCount == 0) { if (overflowEmbeddingCount > 0) { overflowEmbeddingCount--; } else if (!_statusStack.Peek().IsolateStatus && _statusStack.Count >= 2) { _statusStack.Pop(); } } break; } case BidiClass.ParagraphSeparator: { // Rule X8 _resolvedLevels[i] = _paragraphEmbeddingLevel; break; } } } } /// /// Build a map to the original data positions that excludes all /// the types defined by rule X9 /// private void BuildX9RemovalMap() { // Reserve room for the x9 map _x9Map.Length = _originalClasses.Length; if (_hasEmbeddings || _hasIsolates) { // Build a map the removes all x9 characters var j = 0; for (var i = 0; i < _originalClasses.Length; i++) { if (!IsRemovedByX9(_originalClasses[i])) { _x9Map[j++] = i; } } // Set the final length _x9Map.Length = j; } else { for (int i = 0, count = _originalClasses.Length; i < count; i++) { _x9Map[i] = i; } } } /// /// Find the original character index for an entry in the X9 map /// /// Index in the x9 removal map /// Index to the original data [MethodImpl(MethodImplOptions.AggressiveInlining)] private int MapX9(int index) => _x9Map[index]; /// /// Add a new level run /// /// /// This method resolves the sos and eos values for the run /// and adds the run to the list /// /// /// The index of the start of the run (in x9 removed units) /// The length of the run (in x9 removed units) /// The level of the run private void AddLevelRun(int start, int length, int level) { // Get original indices to first and last character in this run var firstCharIndex = MapX9(start); var lastCharIndex = MapX9(start + length - 1); // Work out sos var i = firstCharIndex - 1; while (i >= 0 && IsRemovedByX9(_originalClasses[i])) { i--; } var prevLevel = i < 0 ? _paragraphEmbeddingLevel : _resolvedLevels[i]; var sos = DirectionFromLevel(Math.Max(prevLevel, level)); // Work out eos var lastType = _workingClasses[lastCharIndex]; int nextLevel; switch (lastType) { case BidiClass.LeftToRightIsolate: case BidiClass.RightToLeftIsolate: case BidiClass.FirstStrongIsolate: { nextLevel = _paragraphEmbeddingLevel; break; } default: { i = lastCharIndex + 1; while (i < _originalClasses.Length && IsRemovedByX9(_originalClasses[i])) { i++; } nextLevel = i >= _originalClasses.Length ? _paragraphEmbeddingLevel : _resolvedLevels[i]; break; } } var eos = DirectionFromLevel(Math.Max(nextLevel, level)); // Add the run _levelRuns.Add(new LevelRun(start, length, level, sos, eos)); } /// /// Find all runs of the same level, populating the _levelRuns /// collection /// private void FindLevelRuns() { var currentLevel = -1; var runStart = 0; for (var i = 0; i < _x9Map.Length; ++i) { int level = _resolvedLevels[MapX9(i)]; if (level == currentLevel) { continue; } if (currentLevel != -1) { AddLevelRun(runStart, i - runStart, currentLevel); } currentLevel = level; runStart = i; } // Don't forget the final level run if (currentLevel != -1) { AddLevelRun(runStart, _x9Map.Length - runStart, currentLevel); } } /// /// Given a character index, find the level run that starts at that position /// /// The index into the original (unmapped) data /// The index of the run that starts at that index private int FindRunForIndex(int index) { for (var i = 0; i < _levelRuns.Count; i++) { // Passed index is for the original non-x9 filtered data, however // the level run ranges are for the x9 filtered data. Convert before // comparing if (MapX9(_levelRuns[i].Start) == index) { return i; } } throw new InvalidOperationException("Internal error"); } /// /// Determine and the process all isolated run sequences /// private void ProcessIsolatedRunSequences() { // Find all runs with the same level FindLevelRuns(); // Process them one at a time by first building // a mapping using slices from the x9 map for each // run section that needs to be joined together to // form an complete run. That full run mapping // will be placed in _isolatedRunMapping and then // processed by ProcessIsolatedRunSequence(). while (_levelRuns.Count > 0) { // Clear the mapping _isolatedRunMapping.Clear(); // Combine mappings from this run and all runs that continue on from it var runIndex = 0; BidiClass eos; var sos = _levelRuns[0].Sos; var level = _levelRuns[0].Level; while (true) { // Get the run var r = _levelRuns[runIndex]; // The eos of the isolating run is the eos of the // last level run that comprises it. eos = r.Eos; // Remove this run as we've now processed it _levelRuns.RemoveAt(runIndex); // Add the x9 map indices for the run range to the mapping // for this isolated run _isolatedRunMapping.Add(_x9Map.AsSlice(r.Start, r.Length)); // Get the last character and see if it's an isolating run with a matching // PDI and concatenate that run to this one var lastCharacterIndex = _isolatedRunMapping[_isolatedRunMapping.Length - 1]; var lastType = _originalClasses[lastCharacterIndex]; if ((lastType == BidiClass.LeftToRightIsolate || lastType == BidiClass.RightToLeftIsolate || lastType == BidiClass.FirstStrongIsolate) && _isolatePairs.TryGetValue(lastCharacterIndex, out var nextRunIndex)) { // Find the continuing run index runIndex = FindRunForIndex(nextRunIndex); } else { break; } } // Process this isolated run ProcessIsolatedRunSequence(sos, eos, level); } } /// /// Process a single isolated run sequence, where the character sequence /// mapping is currently held in _isolatedRunMapping. /// private void ProcessIsolatedRunSequence(BidiClass sos, BidiClass eos, int runLevel) { // Create mappings onto the underlying data _runResolvedClasses = new MappedArraySlice(_workingClasses, _isolatedRunMapping.AsSlice()); _runOriginalClasses = new MappedArraySlice(_originalClasses, _isolatedRunMapping.AsSlice()); _runLevels = new MappedArraySlice(_resolvedLevels, _isolatedRunMapping.AsSlice()); if (_hasBrackets) { _runBiDiPairedBracketTypes = new MappedArraySlice(_pairedBracketTypes, _isolatedRunMapping.AsSlice()); _runPairedBracketValues = new MappedArraySlice(_pairedBracketValues, _isolatedRunMapping.AsSlice()); } _runLevel = runLevel; _runDirection = DirectionFromLevel(runLevel); _runLength = _runResolvedClasses.Length; // By tracking the types of characters known to be in the current run, we can // skip some of the rules that we know won't apply. The flags will be // initialized while we're processing rule W1 below. var hasEN = false; var hasAL = false; var hasES = false; var hasCS = false; var hasAN = false; var hasET = false; // Rule W1 // Also, set hasXX flags int i; var previousClass = sos; for (i = 0; i < _runLength; i++) { var resolvedClass = _runResolvedClasses[i]; switch (resolvedClass) { case BidiClass.NonspacingMark: _runResolvedClasses[i] = previousClass; break; case BidiClass.LeftToRightIsolate: case BidiClass.RightToLeftIsolate: case BidiClass.FirstStrongIsolate: case BidiClass.PopDirectionalIsolate: previousClass = BidiClass.OtherNeutral; break; case BidiClass.EuropeanNumber: hasEN = true; previousClass = resolvedClass; break; case BidiClass.ArabicLetter: hasAL = true; previousClass = resolvedClass; break; case BidiClass.EuropeanSeparator: hasES = true; previousClass = resolvedClass; break; case BidiClass.CommonSeparator: hasCS = true; previousClass = resolvedClass; break; case BidiClass.ArabicNumber: hasAN = true; previousClass = resolvedClass; break; case BidiClass.EuropeanTerminator: hasET = true; previousClass = resolvedClass; break; default: previousClass = resolvedClass; break; } } // Rule W2 if (hasEN) { for (i = 0; i < _runLength; i++) { if (_runResolvedClasses[i] != BidiClass.EuropeanNumber) { continue; } for (var j = i - 1; j >= 0; j--) { var resolvedClass = _runResolvedClasses[j]; switch (resolvedClass) { case BidiClass.LeftToRight: case BidiClass.RightToLeft: case BidiClass.ArabicLetter: { if (resolvedClass == BidiClass.ArabicLetter) { _runResolvedClasses[i] = BidiClass.ArabicNumber; hasAN = true; } j = -1; break; } } } } } // Rule W3 if (hasAL) { for (i = 0; i < _runLength; i++) { if (_runResolvedClasses[i] == BidiClass.ArabicLetter) { _runResolvedClasses[i] = BidiClass.RightToLeft; } } } // Rule W4 if ((hasES || hasCS) && (hasEN || hasAN)) { for (i = 1; i < _runLength - 1; ++i) { ref var resolvedClass = ref _runResolvedClasses[i]; if (resolvedClass == BidiClass.EuropeanSeparator) { var previousSeparatorClass = _runResolvedClasses[i - 1]; var nextSeparatorClass = _runResolvedClasses[i + 1]; if (previousSeparatorClass == BidiClass.EuropeanNumber && nextSeparatorClass == BidiClass.EuropeanNumber) { // ES between EN and EN resolvedClass = BidiClass.EuropeanNumber; } } else if (resolvedClass == BidiClass.CommonSeparator) { var previousSeparatorClass = _runResolvedClasses[i - 1]; var nextSeparatorClass = _runResolvedClasses[i + 1]; if ((previousSeparatorClass == BidiClass.ArabicNumber && nextSeparatorClass == BidiClass.ArabicNumber) || (previousSeparatorClass == BidiClass.EuropeanNumber && nextSeparatorClass == BidiClass.EuropeanNumber)) { // CS between (AN and AN) or (EN and EN) resolvedClass = previousSeparatorClass; } } } } // Rule W5 if (hasET && hasEN) { for (i = 0; i < _runLength; ++i) { if (_runResolvedClasses[i] != BidiClass.EuropeanTerminator) { continue; } // Locate end of sequence var sequenceStart = i; var sequenceEnd = i; while (sequenceEnd < _runLength && _runResolvedClasses[sequenceEnd] == BidiClass.EuropeanTerminator) { sequenceEnd++; } // Preceded by, or followed by EN? if ((sequenceStart == 0 ? sos : _runResolvedClasses[sequenceStart - 1]) == BidiClass.EuropeanNumber || (sequenceEnd == _runLength ? eos : _runResolvedClasses[sequenceEnd]) == BidiClass.EuropeanNumber) { // Change the entire range for (var j = sequenceStart; i < sequenceEnd; ++i) { _runResolvedClasses[i] = BidiClass.EuropeanNumber; } } // continue at end of sequence i = sequenceEnd; } } // Rule W6 if (hasES || hasET || hasCS) { for (i = 0; i < _runLength; ++i) { ref var resolvedClass = ref _runResolvedClasses[i]; switch (resolvedClass) { case BidiClass.EuropeanSeparator: case BidiClass.EuropeanTerminator: case BidiClass.CommonSeparator: { resolvedClass = BidiClass.OtherNeutral; break; } } } } // Rule W7. if (hasEN) { var previousStrongClass = sos; for (i = 0; i < _runLength; ++i) { ref var resolvedClass = ref _runResolvedClasses[i]; switch (resolvedClass) { case BidiClass.EuropeanNumber: { // If prev strong type was an L change this to L too if (previousStrongClass == BidiClass.LeftToRight) { _runResolvedClasses[i] = BidiClass.LeftToRight; } break; } case BidiClass.LeftToRight: case BidiClass.RightToLeft: { // Remember previous strong type (NB: AL should already be changed to R) previousStrongClass = resolvedClass; break; } } } } // Rule N0 - process bracket pairs if (_hasBrackets) { int count; var pairedBrackets = LocatePairedBrackets(); for (i = 0, count = pairedBrackets.Count; i < count; i++) { var pairedBracket = pairedBrackets[i]; var strongDirection = InspectPairedBracket(pairedBracket); // Case "d" - no strong types in the brackets, ignore if (strongDirection == BidiClass.OtherNeutral) { continue; } // Case "b" - strong type found that matches the embedding direction if ((strongDirection == BidiClass.LeftToRight || strongDirection == BidiClass.RightToLeft) && strongDirection == _runDirection) { SetPairedBracketDirection(pairedBracket, strongDirection); continue; } // Case "c" - found opposite strong type found, look before to establish context strongDirection = InspectBeforePairedBracket(pairedBracket, sos); if (strongDirection == _runDirection || strongDirection == BidiClass.OtherNeutral) { strongDirection = _runDirection; } SetPairedBracketDirection(pairedBracket, strongDirection); } } // Rules N1 and N2 - resolve neutral types for (i = 0; i < _runLength; ++i) { var resolvedClass = _runResolvedClasses[i]; if (IsNeutralClass(resolvedClass)) { // Locate end of sequence var seqStart = i; var seqEnd = i; while (seqEnd < _runLength && IsNeutralClass(_runResolvedClasses[seqEnd])) { seqEnd++; } // Work out the preceding class BidiClass classBefore; if (seqStart == 0) { classBefore = sos; } else { classBefore = _runResolvedClasses[seqStart - 1]; switch (classBefore) { case BidiClass.ArabicNumber: case BidiClass.EuropeanNumber: { classBefore = BidiClass.RightToLeft; break; } } } // Work out the following class BidiClass classAfter; if (seqEnd == _runLength) { classAfter = eos; } else { classAfter = _runResolvedClasses[seqEnd]; switch (classAfter) { case BidiClass.ArabicNumber: case BidiClass.EuropeanNumber: { classAfter = BidiClass.RightToLeft; break; } } } // Work out the final resolved type BidiClass finalResolveClass; if (classBefore == classAfter) { // Rule N1 finalResolveClass = classBefore; } else { // Rule N2 finalResolveClass = _runDirection; } // Apply changes for (var j = seqStart; j < seqEnd; j++) { _runResolvedClasses[j] = finalResolveClass; } // continue after this run i = seqEnd; } } // Rules I1 and I2 - resolve implicit types if ((_runLevel & 0x01) == 0) { // Rule I1 - even for (i = 0; i < _runLength; i++) { var resolvedClass = _runResolvedClasses[i]; ref var currentRunLevel = ref _runLevels[i]; switch (resolvedClass) { case BidiClass.RightToLeft: { currentRunLevel++; break; } case BidiClass.ArabicNumber: case BidiClass.EuropeanNumber: { currentRunLevel += 2; break; } } } } else { // Rule I2 - odd for (i = 0; i < _runLength; i++) { var resolvedClass = _runResolvedClasses[i]; ref var currentRunLevel = ref _runLevels[i]; if (resolvedClass != BidiClass.RightToLeft) { currentRunLevel++; } } } } /// /// Locate all pair brackets in the current isolating run /// /// A sorted list of BracketPairs private List LocatePairedBrackets() { // Clear work collections _pendingOpeningBrackets.Clear(); _pairedBrackets.Clear(); // Since List.Sort is expensive on memory if called often (it internally // allocates an ArraySorted object) and since we will rarely have many // items in this list (most paragraphs will only have a handful of bracket // pairs - if that), we use a simple linear lookup and insert most of the // time. If there are more that `sortLimit` paired brackets we abort th // linear searching/inserting and using List.Sort at the end. const int sortLimit = 8; // Process all characters in the run, looking for paired brackets for (int i = 0, length = _runLength; i < length; i++) { // Ignore non-neutral characters if (_runResolvedClasses[i] != BidiClass.OtherNeutral) { continue; } switch (_runBiDiPairedBracketTypes[i]) { case BidiPairedBracketType.Open: if (_pendingOpeningBrackets.Count == MaxPairedBracketDepth) { goto exit; } _pendingOpeningBrackets.Insert(0, i); break; case BidiPairedBracketType.Close: // see if there is a match for (var j = 0; j < _pendingOpeningBrackets.Count; j++) { if (_runPairedBracketValues[i] != _runPairedBracketValues[_pendingOpeningBrackets[j]]) { continue; } // Add this paired bracket set var opener = _pendingOpeningBrackets[j]; if (_pairedBrackets.Count < sortLimit) { var ppi = 0; while (ppi < _pairedBrackets.Count && _pairedBrackets[ppi].OpeningIndex < opener) { ppi++; } _pairedBrackets.Insert(ppi, new BracketPair(opener, i)); } else { _pairedBrackets.Add(new BracketPair(opener, i)); } // remove up to and including matched opener _pendingOpeningBrackets.RemoveRange(0, j + 1); break; } break; } } exit: // Is a sort pending? if (_pairedBrackets.Count > sortLimit) { _pairedBrackets.Sort(); } return _pairedBrackets; } /// /// Inspect a paired bracket set and determine its strong direction /// /// The paired bracket to be inspected /// The direction of the bracket set content private BidiClass InspectPairedBracket(in BracketPair bracketPair) { var directionFromLevel = DirectionFromLevel(_runLevel); var directionOpposite = BidiClass.OtherNeutral; for (var i = bracketPair.OpeningIndex + 1; i < bracketPair.ClosingIndex; i++) { var dir = GetStrongClassN0(_runResolvedClasses[i]); if (dir == BidiClass.OtherNeutral) { continue; } if (dir == directionFromLevel) { return dir; } directionOpposite = dir; } return directionOpposite; } /// /// Look for a strong type before a paired bracket /// /// The paired bracket set to be inspected /// The sos in case nothing found before the bracket /// The strong direction before the brackets private BidiClass InspectBeforePairedBracket(in BracketPair bracketPair, BidiClass sos) { for (var i = bracketPair.OpeningIndex - 1; i >= 0; --i) { var direction = GetStrongClassN0(_runResolvedClasses[i]); if (direction != BidiClass.OtherNeutral) { return direction; } } return sos; } /// /// Sets the direction of a bracket pair, including setting the direction of /// NSM's inside the brackets and following. /// /// The paired brackets /// The resolved direction for the bracket pair private void SetPairedBracketDirection(in BracketPair pairedBracket, BidiClass direction) { // Set the direction of the brackets _runResolvedClasses[pairedBracket.OpeningIndex] = direction; _runResolvedClasses[pairedBracket.ClosingIndex] = direction; // Set the directionality of NSM's inside the brackets // BN characters (such as ZWJ or ZWSP) that appear between the base bracket character // and the nonspacing mark should be ignored. for (int i = pairedBracket.OpeningIndex + 1; i < pairedBracket.ClosingIndex; i++) { if (_runOriginalClasses[i] == BidiClass.NonspacingMark) { _runOriginalClasses[i] = direction; } else if (_runOriginalClasses[i] != BidiClass.BoundaryNeutral) { break; } } // Set the directionality of NSM's following the brackets for (int i = pairedBracket.ClosingIndex + 1; i < _runLength; i++) { if (_runOriginalClasses[i] == BidiClass.NonspacingMark) { _runOriginalClasses[i] = direction; } else if (_runOriginalClasses[i] != BidiClass.BoundaryNeutral) { break; } } } /// /// Resets whitespace levels. Implements rule L1 /// private void ResetWhitespaceLevels() { for (var i = 0; i < _resolvedLevels.Length; i++) { var originalClass = _originalClasses[i]; switch (originalClass) { case BidiClass.ParagraphSeparator: case BidiClass.SegmentSeparator: { // Rule L1, clauses one and two. _resolvedLevels[i] = _paragraphEmbeddingLevel; // Rule L1, clause three. for (var j = i - 1; j >= 0; --j) { if (IsWhitespace(_originalClasses[j])) { // including format codes _resolvedLevels[j] = _paragraphEmbeddingLevel; } else { break; } } break; } } } // Rule L1, clause four. for (var j = _resolvedLevels.Length - 1; j >= 0; j--) { if (IsWhitespace(_originalClasses[j])) { // including format codes _resolvedLevels[j] = _paragraphEmbeddingLevel; } else { break; } } } /// /// Assign levels to any characters that would be have been /// removed by rule X9. The idea is to keep level runs together /// that would otherwise be broken by an interfering isolate/embedding /// control character. /// private void AssignLevelsToCodePointsRemovedByX9() { // Redundant? if (!_hasIsolates && !_hasEmbeddings) { return; } // No-op? if (_workingClasses.Length == 0) { return; } // Fix up first character if (_resolvedLevels[0] < 0) { _resolvedLevels[0] = _paragraphEmbeddingLevel; } if (IsRemovedByX9(_originalClasses[0])) { _workingClasses[0] = _originalClasses[0]; } for (int i = 1, length = _workingClasses.Length; i < length; i++) { var originalClass = _originalClasses[i]; if (IsRemovedByX9(originalClass)) { _workingClasses[i] = originalClass; _resolvedLevels[i] = _resolvedLevels[i - 1]; } } } /// /// Check if a directionality type represents whitespace /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsWhitespace(BidiClass biDiClass) { switch (biDiClass) { case BidiClass.LeftToRightEmbedding: case BidiClass.RightToLeftEmbedding: case BidiClass.LeftToRightOverride: case BidiClass.RightToLeftOverride: case BidiClass.PopDirectionalFormat: case BidiClass.LeftToRightIsolate: case BidiClass.RightToLeftIsolate: case BidiClass.FirstStrongIsolate: case BidiClass.PopDirectionalIsolate: case BidiClass.BoundaryNeutral: case BidiClass.WhiteSpace: return true; default: return false; } } /// /// Convert a level to a direction where odd is RTL and /// even is LTR /// /// The level to convert /// A directionality [MethodImpl(MethodImplOptions.AggressiveInlining)] private static BidiClass DirectionFromLevel(int level) => ((level & 0x1) == 0) ? BidiClass.LeftToRight : BidiClass.RightToLeft; /// /// Helper to check if a directionality is removed by rule X9 /// /// The bidi type to check /// True if rule X9 would remove this character; otherwise false [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsRemovedByX9(BidiClass biDiClass) { switch (biDiClass) { case BidiClass.LeftToRightEmbedding: case BidiClass.RightToLeftEmbedding: case BidiClass.LeftToRightOverride: case BidiClass.RightToLeftOverride: case BidiClass.PopDirectionalFormat: case BidiClass.BoundaryNeutral: return true; default: return false; } } /// /// Check if a a directionality is neutral for rules N1 and N2 /// [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsNeutralClass(BidiClass direction) { switch (direction) { case BidiClass.ParagraphSeparator: case BidiClass.SegmentSeparator: case BidiClass.WhiteSpace: case BidiClass.OtherNeutral: case BidiClass.RightToLeftIsolate: case BidiClass.LeftToRightIsolate: case BidiClass.FirstStrongIsolate: case BidiClass.PopDirectionalIsolate: return true; default: return false; } } /// /// Maps a direction to a strong class for rule N0 /// /// The direction to map /// A strong direction - R, L or ON [MethodImpl(MethodImplOptions.AggressiveInlining)] private static BidiClass GetStrongClassN0(BidiClass direction) { switch (direction) { case BidiClass.EuropeanNumber: case BidiClass.ArabicNumber: case BidiClass.ArabicLetter: case BidiClass.RightToLeft: return BidiClass.RightToLeft; case BidiClass.LeftToRight: return BidiClass.LeftToRight; default: return BidiClass.OtherNeutral; } } /// /// Hold the start and end index of a pair of brackets /// private readonly struct BracketPair : IComparable { /// /// Initializes a new instance of the struct. /// /// Index of the opening bracket /// Index of the closing bracket public BracketPair(int openingIndex, int closingIndex) { OpeningIndex = openingIndex; ClosingIndex = closingIndex; } /// /// Gets the index of the opening bracket /// public int OpeningIndex { get; } /// /// Gets the index of the closing bracket /// public int ClosingIndex { get; } public int CompareTo(BracketPair other) => OpeningIndex.CompareTo(other.OpeningIndex); } /// /// Status stack entry used while resolving explicit /// embedding levels /// private readonly struct Status { public Status(sbyte embeddingLevel, BidiClass overrideStatus, bool isolateStatus) { EmbeddingLevel = embeddingLevel; OverrideStatus = overrideStatus; IsolateStatus = isolateStatus; } public sbyte EmbeddingLevel { get; } public BidiClass OverrideStatus { get; } public bool IsolateStatus { get; } } /// /// Provides information about a level run - a continuous /// sequence of equal levels. /// private readonly struct LevelRun { public LevelRun(int start, int length, int level, BidiClass sos, BidiClass eos) { Start = start; Length = length; Level = level; Sos = sos; Eos = eos; } public int Start { get; } public int Length { get; } public int Level { get; } public BidiClass Sos { get; } public BidiClass Eos { get; } } } }