From f67b781c55ab65e0ddf6b632ebbb36eb354954ab Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Sun, 13 Oct 2024 20:01:33 +0300 Subject: [PATCH 1/6] Replace generic state machine index types with dynamic dispatch. --- src/FarkleNeo/Buffers/BufferExtensions.cs | 40 +--- .../StateMachines/DfaImplementationBase.cs | 27 ++- .../StateMachines/DfaWithConflicts.cs | 59 +++--- .../StateMachines/DfaWithoutConflicts.cs | 51 +++--- .../StateMachines/LrImplementationBase.cs | 39 ++-- .../Grammars/StateMachines/LrWithConflicts.cs | 82 ++++----- .../StateMachines/LrWithoutConflicts.cs | 69 +++---- .../StateMachines/StateMachineUtilities.cs | 172 +----------------- 8 files changed, 166 insertions(+), 373 deletions(-) diff --git a/src/FarkleNeo/Buffers/BufferExtensions.cs b/src/FarkleNeo/Buffers/BufferExtensions.cs index d09a4bc3..6a809ba8 100644 --- a/src/FarkleNeo/Buffers/BufferExtensions.cs +++ b/src/FarkleNeo/Buffers/BufferExtensions.cs @@ -58,22 +58,18 @@ public static ulong ReadUInt64(this ReadOnlySpan buffer, int index) => BinaryPrimitives.ReadUInt64LittleEndian(buffer[index..]); // Signed integers of variable size must be read with this method to ensure they are sign-extended. - public static int ReadIntVariableSize(this ReadOnlySpan buffer, int index) + public static int ReadIntVariableSize(this ReadOnlySpan buffer, int index, byte dataSize) { - if (typeof(T) == typeof(sbyte)) - { - return (sbyte)buffer[index]; - } - if (typeof(T) == typeof(short)) - { - return (short)buffer.ReadUInt16(index); - } - if (typeof(T) == typeof(int)) + switch (dataSize) { - return buffer.ReadInt32(index); + case 1: + return (sbyte)buffer[index]; + case 2: + return (short)buffer.ReadUInt16(index); + default: + Debug.Assert(dataSize == 4); + return buffer.ReadInt32(index); } - - throw new NotSupportedException("Unsupported type."); } public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int index, byte dataSize) @@ -90,24 +86,6 @@ public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int inde } } - public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int index) - { - if (typeof(T) == typeof(byte)) - { - return buffer[index]; - } - if (typeof(T) == typeof(ushort)) - { - return buffer.ReadUInt16(index); - } - if (typeof(T) == typeof(uint)) - { - return buffer.ReadUInt32(index); - } - - throw new NotSupportedException("Unsupported type."); - } - public static void WriteBlobLength(this IBufferWriter buffer, int value) { switch ((uint)value) diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs b/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs index b950f43e..8ad54c6e 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs @@ -2,14 +2,15 @@ // SPDX-License-Identifier: MIT using Farkle.Buffers; -using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; namespace Farkle.Grammars.StateMachines; -internal unsafe abstract class DfaImplementationBase : Dfa where TChar : unmanaged, IComparable +internal unsafe abstract class DfaImplementationBase : Dfa where TChar : unmanaged, IComparable { + protected readonly byte _stateIndexSize, _edgeIndexSize, _tokenSymbolIndexSize; + protected readonly int _edgeCount; public required int FirstEdgeBase { get; init; } @@ -22,26 +23,32 @@ internal unsafe abstract class DfaImplementationBase : Dfa public required int DefaultTransitionBase { get; init; } - protected DfaImplementationBase(Grammar grammar, int stateCount, int edgeCount, bool hasConflicts) : base(stateCount, hasConflicts) + public required int AcceptBase { get; init; } + + protected DfaImplementationBase(Grammar grammar, int stateCount, int edgeCount, int tokenSymbolCount, bool hasConflicts) : base(stateCount, hasConflicts) { - Debug.Assert(GrammarUtilities.GetCompressedIndexSize(stateCount) == sizeof(TState)); - Debug.Assert(GrammarUtilities.GetCompressedIndexSize(edgeCount) == sizeof(TEdge)); + _stateIndexSize = GrammarUtilities.GetCompressedIndexSize(stateCount); + _edgeIndexSize = GrammarUtilities.GetCompressedIndexSize(edgeCount); + _tokenSymbolIndexSize = GrammarUtilities.GetCompressedIndexSize(tokenSymbolCount); Grammar = grammar; _edgeCount = edgeCount; } protected int ReadFirstEdge(ReadOnlySpan grammarFile, int state) => - (int)grammarFile.ReadUIntVariableSize(FirstEdgeBase + state * sizeof(TEdge)); + (int)grammarFile.ReadUIntVariableSize(FirstEdgeBase + state * _edgeIndexSize, _edgeIndexSize); + + protected int ReadState(ReadOnlySpan grammarFile, int @base, int index) => + (int)grammarFile.ReadUIntVariableSize(@base + index * _stateIndexSize, _stateIndexSize) - 1; - protected static int ReadState(ReadOnlySpan grammarFile, int @base) => - (int)grammarFile.ReadUIntVariableSize(@base) - 1; + protected TokenSymbolHandle ReadAcceptSymbol(ReadOnlySpan grammarFile, int index) => + new(grammarFile.ReadUIntVariableSize(AcceptBase + index * _tokenSymbolIndexSize, _tokenSymbolIndexSize)); internal sealed override Grammar Grammar { get; } private int GetDefaultTransitionUnsafe(ReadOnlySpan grammarFile, int state) { - return ReadState(grammarFile, DefaultTransitionBase + state * sizeof(TState)); + return ReadState(grammarFile, DefaultTransitionBase, state); } private (int Offset, int Count) GetEdgeBoundsUnsafe(ReadOnlySpan grammarFile, int state) @@ -55,7 +62,7 @@ private DfaEdge GetEdgeAtUnsafe(ReadOnlySpan grammarFile, int index { TChar cFrom = StateMachineUtilities.Read(grammarFile, RangeFromBase + index * sizeof(char)); TChar cTo = StateMachineUtilities.Read(grammarFile, RangeToBase + index * sizeof(char)); - int target = ReadState(grammarFile, EdgeTargetBase + index * sizeof(TState)); + int target = ReadState(grammarFile, EdgeTargetBase, + index); return new(cFrom, cTo, target); } diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs index 5b3c221a..1d0abf75 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs @@ -2,35 +2,32 @@ // SPDX-License-Identifier: MIT using Farkle.Buffers; -using System.Diagnostics; using System.Diagnostics.CodeAnalysis; namespace Farkle.Grammars.StateMachines; -internal unsafe sealed class DfaWithConflicts : DfaImplementationBase where TChar : unmanaged, IComparable +internal unsafe sealed class DfaWithConflicts : DfaImplementationBase where TChar : unmanaged, IComparable { + private readonly byte _acceptIndexSize; + private readonly int _acceptCount; internal required int FirstAcceptBase { get; init; } - internal required int AcceptBase { get; init; } - - public DfaWithConflicts(Grammar grammar, int stateCount, int edgeCount, int acceptCount) : base(grammar, stateCount, edgeCount, true) + [SetsRequiredMembers] + public DfaWithConflicts(Grammar grammar, int stateCount, int edgeCount, int acceptCount, int tokenSymbolCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) + : base(grammar, stateCount, edgeCount, tokenSymbolCount, true) { - Debug.Assert(GrammarUtilities.GetCompressedIndexSize(acceptCount) == sizeof(TAccept)); - + _acceptIndexSize = GrammarUtilities.GetCompressedIndexSize(acceptCount); _acceptCount = acceptCount; - } - public static DfaWithConflicts Create(Grammar grammar, int stateCount, int edgeCount, int acceptCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) - { int expectedSize = sizeof(uint) * 3 - + stateCount * sizeof(TEdge) + + stateCount * _edgeIndexSize + edgeCount * sizeof(TChar) * 2 - + edgeCount * sizeof(TState) - + stateCount * sizeof(TAccept) - + acceptCount * sizeof(TTokenSymbol); + + edgeCount * _stateIndexSize + + stateCount * _acceptIndexSize + + acceptCount * _tokenSymbolIndexSize; if (dfa.Length != expectedSize) { @@ -38,34 +35,31 @@ public static DfaWithConflicts Crea } int firstEdgeBase = dfa.Offset + sizeof(uint) * 3; - int rangeFromBase = firstEdgeBase + stateCount * sizeof(TEdge); + int rangeFromBase = firstEdgeBase + stateCount * _edgeIndexSize; int rangeToBase = rangeFromBase + edgeCount * sizeof(TChar); int edgeTargetBase = rangeToBase + edgeCount * sizeof(TChar); - int firstAcceptBase = edgeTargetBase + edgeCount * sizeof(TState); - int acceptBase = firstAcceptBase + stateCount * sizeof(TAccept); + int firstAcceptBase = edgeTargetBase + edgeCount * _stateIndexSize; + int acceptBase = firstAcceptBase + stateCount * _acceptIndexSize; if (dfaDefaultTransitions.Length > 0) { - if (dfaDefaultTransitions.Length != stateCount * sizeof(TState)) + if (dfaDefaultTransitions.Length != stateCount * _stateIndexSize) { ThrowHelpers.ThrowInvalidDfaDataSize(); } } - return new(grammar, stateCount, edgeCount, acceptCount) - { - FirstEdgeBase = firstEdgeBase, - RangeFromBase = rangeFromBase, - RangeToBase = rangeToBase, - EdgeTargetBase = edgeTargetBase, - DefaultTransitionBase = dfaDefaultTransitions.Offset, - FirstAcceptBase = firstAcceptBase, - AcceptBase = acceptBase - }; + FirstEdgeBase = firstEdgeBase; + RangeFromBase = rangeFromBase; + RangeToBase = rangeToBase; + EdgeTargetBase = edgeTargetBase; + DefaultTransitionBase = dfaDefaultTransitions.Offset; + AcceptBase = acceptBase; + FirstAcceptBase = firstAcceptBase; } private int ReadFirstAccept(ReadOnlySpan grammarFile, int state) => - (int)grammarFile.ReadUIntVariableSize(FirstAcceptBase + state * sizeof(TAccept)); + (int)grammarFile.ReadUIntVariableSize(FirstAcceptBase + state * _acceptIndexSize, _acceptIndexSize); private (int Offset, int Count) GetAcceptSymbolBoundsUnsafe(ReadOnlySpan grammarFile, int state) { @@ -74,9 +68,6 @@ private int ReadFirstAccept(ReadOnlySpan grammarFile, int state) => return (firstAccept, nextFirstAccept - firstAccept); } - private TokenSymbolHandle GetAcceptSymbolAtUnsafe(ReadOnlySpan grammarFile, int index) => - new(grammarFile.ReadUIntVariableSize(AcceptBase + index * sizeof(TTokenSymbol))); - internal override (int Offset, int Count) GetAcceptSymbolBounds(int state) { ValidateStateIndex(state); @@ -89,7 +80,7 @@ internal override TokenSymbolHandle GetAcceptSymbolAt(int index) { ThrowHelpers.ThrowArgumentOutOfRangeException(nameof(index)); } - return GetAcceptSymbolAtUnsafe(Grammar.GrammarFile, index); + return ReadAcceptSymbol(Grammar.GrammarFile, index); } internal override void ValidateContent(ReadOnlySpan grammarFile, in GrammarTables grammarTables) @@ -111,7 +102,7 @@ internal override void ValidateContent(ReadOnlySpan grammarFile, in Gramma for (int i = 0; i < _acceptCount; i++) { - TokenSymbolHandle acceptSymbol = GetAcceptSymbolAtUnsafe(grammarFile, i); + TokenSymbolHandle acceptSymbol = ReadAcceptSymbol(grammarFile, i); grammarTables.ValidateHandle(acceptSymbol); } diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs index 3268cae1..b7c37d22 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs @@ -1,15 +1,14 @@ // Copyright © Theodore Tsirpanis and Contributors. // SPDX-License-Identifier: MIT +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using Farkle.Buffers; namespace Farkle.Grammars.StateMachines; -internal unsafe sealed class DfaWithoutConflicts(Grammar grammar, int stateCount, int edgeCount) : DfaImplementationBase(grammar, stateCount, edgeCount, false) where TChar : unmanaged, IComparable +internal unsafe sealed class DfaWithoutConflicts : DfaImplementationBase where TChar : unmanaged, IComparable { - internal required int AcceptBase { get; init; } - /// /// A lookup table with the next state for each ASCII character, for each starting state. /// @@ -35,14 +34,16 @@ private static char CastChar(TChar c) private static bool IsAscii(TChar c) => CastChar(c) < StateMachineUtilities.AsciiCharacterCount; - public static DfaWithoutConflicts Create(Grammar grammar, int stateCount, int edgeCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) + [SetsRequiredMembers] + public DfaWithoutConflicts(Grammar grammar, int stateCount, int edgeCount, int tokenSymbolCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) + : base(grammar, stateCount, edgeCount, tokenSymbolCount, false) { int expectedSize = sizeof(uint) * 2 - + stateCount * sizeof(TEdge) + + stateCount * _edgeIndexSize + edgeCount * sizeof(TChar) * 2 - + edgeCount * sizeof(TState) - + stateCount * sizeof(TTokenSymbol); + + edgeCount * _stateIndexSize + + stateCount * _tokenSymbolIndexSize; if (dfa.Length != expectedSize) { @@ -50,28 +51,25 @@ public static DfaWithoutConflicts Create(Gra } int firstEdgeBase = dfa.Offset + sizeof(uint) * 2; - int rangeFromBase = firstEdgeBase + stateCount * sizeof(TEdge); + int rangeFromBase = firstEdgeBase + stateCount * _edgeIndexSize; int rangeToBase = rangeFromBase + edgeCount * sizeof(TChar); int edgeTargetBase = rangeToBase + edgeCount * sizeof(TChar); - int acceptBase = edgeTargetBase + edgeCount * sizeof(TState); + int acceptBase = edgeTargetBase + edgeCount * _stateIndexSize; if (dfaDefaultTransitions.Length > 0) { - if (dfaDefaultTransitions.Length != stateCount * sizeof(TState)) + if (dfaDefaultTransitions.Length != stateCount * _stateIndexSize) { ThrowHelpers.ThrowInvalidDfaDataSize(); } } - return new(grammar, stateCount, edgeCount) - { - FirstEdgeBase = firstEdgeBase, - RangeFromBase = rangeFromBase, - RangeToBase = rangeToBase, - EdgeTargetBase = edgeTargetBase, - DefaultTransitionBase = dfaDefaultTransitions.Offset, - AcceptBase = acceptBase - }; + FirstEdgeBase = firstEdgeBase; + RangeFromBase = rangeFromBase; + RangeToBase = rangeToBase; + EdgeTargetBase = edgeTargetBase; + DefaultTransitionBase = dfaDefaultTransitions.Offset; + AcceptBase = acceptBase; } internal override (int Offset, int Count) GetAcceptSymbolBounds(int state) @@ -88,13 +86,10 @@ internal override (int Offset, int Count) GetAcceptSymbolBounds(int state) internal override TokenSymbolHandle GetAcceptSymbolAt(int index) => GetAcceptSymbol(index); - private TokenSymbolHandle GetAcceptSymbolUnsafe(ReadOnlySpan grammarFile, int state) => - new(grammarFile.ReadUIntVariableSize(AcceptBase + state * sizeof(TTokenSymbol))); - private TokenSymbolHandle GetAcceptSymbol(int state) { ValidateStateIndex(state); - return GetAcceptSymbolUnsafe(Grammar.GrammarFile, state); + return ReadAcceptSymbol(Grammar.GrammarFile, state); } private int NextState(ReadOnlySpan grammarFile, int state, TChar c) @@ -127,13 +122,13 @@ private int NextState(ReadOnlySpan grammarFile, int state, TChar c) if (cFrom.CompareTo(c) <= 0 && c.CompareTo(cTo) <= 0) { - return ReadState(grammarFile, EdgeTargetBase + (edgeOffset + edge) * sizeof(TState)); + return ReadState(grammarFile, EdgeTargetBase, edgeOffset + edge); } } if (DefaultTransitionBase != 0) { - return ReadState(grammarFile, DefaultTransitionBase + state * sizeof(TState)); + return ReadState(grammarFile, DefaultTransitionBase, state); } return -1; @@ -155,7 +150,7 @@ internal override (TokenSymbolHandle AcceptSymbol, int CharactersRead, int Token { ignoreLeadingErrors = false; currentState = nextState; - if (GetAcceptSymbolUnsafe(grammarFile, currentState) is { HasValue: true } s) + if (ReadAcceptSymbol(grammarFile, currentState) is { HasValue: true } s) { acceptSymbol = s; acceptSymbolLength = i + 1; @@ -177,7 +172,7 @@ internal override (TokenSymbolHandle AcceptSymbol, int CharactersRead, int Token acceptSymbol = default; } - Return: + Return: if (acceptSymbol.HasValue) { return (acceptSymbol, acceptSymbolLength, currentState); @@ -196,7 +191,7 @@ internal override void ValidateContent(ReadOnlySpan grammarFile, in Gramma for (int state = 0; state < Count; state++) { - TokenSymbolHandle acceptSymbol = GetAcceptSymbolUnsafe(grammarFile, state); + TokenSymbolHandle acceptSymbol = ReadAcceptSymbol(grammarFile, state); if (acceptSymbol.HasValue) { grammarTables.ValidateHandle(acceptSymbol); diff --git a/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs b/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs index ab980cfb..76bb9835 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs @@ -8,10 +8,10 @@ namespace Farkle.Grammars.StateMachines; -internal unsafe abstract class LrImplementationBase : LrStateMachine - where TTokenSymbol : unmanaged, IComparable - where TNonterminal : unmanaged, IComparable +internal unsafe abstract class LrImplementationBase : LrStateMachine { + protected readonly byte _stateIndexSize, _actionIndexSize, _gotoIndexSize, _actionSize, _eofActionSize, _tokenSymbolIndexSize, _nonterminalIndexSize; + private Dictionary[]? _gotoLookup; protected int ActionCount { get; } @@ -24,17 +24,23 @@ internal unsafe abstract class LrImplementationBase grammarFile, int index) => - new(grammarFile.ReadIntVariableSize(ActionBase + index * sizeof(TAction))); + new(grammarFile.ReadIntVariableSize(ActionBase + index * _actionSize, _actionSize)); + + protected LrEndOfFileAction ReadEofAction(ReadOnlySpan grammarFile, int index) => + new(ReadUIntVariableSizeFromArray(grammarFile, EofActionBase, index, _eofActionSize)); protected int ReadFirstAction(ReadOnlySpan grammarFile, int state) => - (int)ReadUIntVariableSizeFromArray(grammarFile, FirstActionBase, state); + (int)ReadUIntVariableSizeFromArray(grammarFile, FirstActionBase, state, _actionIndexSize); protected int ReadFirstGoto(ReadOnlySpan grammarFile, int state) => - (int)ReadUIntVariableSizeFromArray(grammarFile, FirstGotoBase, state); + (int)ReadUIntVariableSizeFromArray(grammarFile, FirstGotoBase, state, _gotoIndexSize); protected int ReadGoto(ReadOnlySpan grammarFile, int index) => - (int)ReadUIntVariableSizeFromArray(grammarFile, GotoStateBase + index * sizeof(TStateIndex), 0); + (int)ReadUIntVariableSizeFromArray(grammarFile, GotoStateBase, index, _stateIndexSize); - protected static uint ReadUIntVariableSizeFromArray(ReadOnlySpan grammarFile, int @base, int index) => - grammarFile.ReadUIntVariableSize(@base + index * sizeof(T)); + protected static uint ReadUIntVariableSizeFromArray(ReadOnlySpan grammarFile, int @base, int index, byte indexSize) => + grammarFile.ReadUIntVariableSize(@base + index * indexSize, indexSize); internal sealed override Grammar Grammar { get; } @@ -93,7 +102,7 @@ internal sealed override int GetGoto(int state, NonterminalHandle nonterminal) protected KeyValuePair GetActionAtUnsafe(ReadOnlySpan grammarFile, int index) { - TokenSymbolHandle terminal = new(ReadUIntVariableSizeFromArray(grammarFile, ActionTerminalBase, index)); + TokenSymbolHandle terminal = new(ReadUIntVariableSizeFromArray(grammarFile, ActionTerminalBase, index, _tokenSymbolIndexSize)); LrAction action = ReadAction(grammarFile, index); return new(terminal, action); } @@ -107,7 +116,7 @@ protected KeyValuePair GetActionAtUnsafe(ReadOnlySp private KeyValuePair GetGotoAtUnsafe(ReadOnlySpan grammarFile, int index) { - NonterminalHandle nonterminal = new(ReadUIntVariableSizeFromArray(grammarFile, GotoNonterminalBase, index)); + NonterminalHandle nonterminal = new(ReadUIntVariableSizeFromArray(grammarFile, GotoNonterminalBase, index, _nonterminalIndexSize)); int state = ReadGoto(grammarFile, index); return new(nonterminal, state); } diff --git a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs index 56e75cb5..cd0eb22e 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs @@ -1,41 +1,35 @@ // Copyright © Theodore Tsirpanis and Contributors. // SPDX-License-Identifier: MIT -using System.Diagnostics; using System.Diagnostics.CodeAnalysis; namespace Farkle.Grammars.StateMachines; -internal unsafe sealed class LrWithConflicts - : LrImplementationBase - where TTokenSymbol : unmanaged, IComparable - where TNonterminal : unmanaged, IComparable +internal unsafe sealed class LrWithConflicts : LrImplementationBase { + private readonly byte _eofActionIndexSize; + private readonly int _eofActionCount; internal required int FirstEofActionBase { get; init; } - internal required int EofActionBase { get; init; } - - public LrWithConflicts(Grammar grammar, int stateCount, int actionCount, int gotoCount, int eofActionCount) : base(grammar, stateCount, actionCount, gotoCount, true) + [SetsRequiredMembers] + public LrWithConflicts(Grammar grammar, int stateCount, int actionCount, int gotoCount, int eofActionCount, in GrammarTables grammarTables, GrammarFileSection lr) + : base(grammar, stateCount, actionCount, gotoCount, in grammarTables, true) { - Debug.Assert(GrammarUtilities.GetCompressedIndexSize(eofActionCount) == sizeof(TEofActionIndex)); - + _eofActionIndexSize = GrammarUtilities.GetCompressedIndexSize(eofActionCount); _eofActionCount = eofActionCount; - } - public static LrWithConflicts Create(Grammar grammar, int stateCount, int actionCount, int gotoCount, int eofActionCount, GrammarFileSection lr) - { int expectedSize = sizeof(uint) * 4 - + stateCount * sizeof(TActionIndex) - + actionCount * sizeof(TTokenSymbol) - + actionCount * sizeof(TAction) - + stateCount * sizeof(TEofActionIndex) - + eofActionCount * sizeof(TEofAction) - + stateCount * sizeof(TGotoIndex) - + gotoCount * sizeof(TNonterminal) - + gotoCount * sizeof(TStateIndex); + + stateCount * _actionIndexSize + + actionCount * _tokenSymbolIndexSize + + actionCount * _actionSize + + stateCount * _eofActionIndexSize + + eofActionCount * _eofActionSize + + stateCount * _gotoIndexSize + + gotoCount * _nonterminalIndexSize + + gotoCount * _stateIndexSize; if (lr.Length != expectedSize) { @@ -43,25 +37,22 @@ public static LrWithConflicts @@ -70,13 +61,8 @@ internal override LrAction GetAction(int state, TokenSymbolHandle terminal) => internal override LrEndOfFileAction GetEndOfFileAction(int state) => throw CreateHasConflictsException(); - private LrEndOfFileAction GetEndOfFileActionAtUnsafe(ReadOnlySpan grammarFile, int index) - { - return new(ReadUIntVariableSizeFromArray(grammarFile, EofActionBase, index)); - } - private int ReadFirstEofAction(ReadOnlySpan grammarFile, int state) => - (int)ReadUIntVariableSizeFromArray(grammarFile, FirstEofActionBase, state); + (int)ReadUIntVariableSizeFromArray(grammarFile, FirstEofActionBase, state, _eofActionIndexSize); internal override LrEndOfFileAction GetEndOfFileActionAt(int index) { @@ -84,7 +70,7 @@ internal override LrEndOfFileAction GetEndOfFileActionAt(int index) { ThrowHelpers.ThrowArgumentOutOfRangeException(nameof(index)); } - return GetEndOfFileActionAtUnsafe(Grammar.GrammarFile, index); + return ReadEofAction(Grammar.GrammarFile, index); } internal override (int Offset, int Count) GetEndOfFileActionBounds(int state) @@ -121,7 +107,7 @@ internal override void ValidateContent(ReadOnlySpan grammarFile, in Gramma for (int i = 0; i < _eofActionCount; i++) { - ValidateAction(GetEndOfFileActionAtUnsafe(grammarFile, i), in grammarTables); + ValidateAction(ReadEofAction(grammarFile, i), in grammarTables); } static void Assert([DoesNotReturnIf(false)] bool condition, string? message = null) diff --git a/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs index cbf719f7..64a89723 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs @@ -1,28 +1,27 @@ // Copyright © Theodore Tsirpanis and Contributors. // SPDX-License-Identifier: MIT +using System.Diagnostics.CodeAnalysis; + namespace Farkle.Grammars.StateMachines; -internal unsafe sealed class LrWithoutConflicts(Grammar grammar, int stateCount, int actionCount, int gotoCount) - : LrImplementationBase(grammar, stateCount, actionCount, gotoCount, false) - where TTokenSymbol : unmanaged, IComparable - where TNonterminal : unmanaged, IComparable +internal unsafe sealed class LrWithoutConflicts : LrImplementationBase { private Dictionary[]? _actionLookup; - internal required int EofActionBase { get; init; } - - public static LrWithoutConflicts Create(Grammar grammar, int stateCount, int actionCount, int gotoCount, GrammarFileSection lr) + [SetsRequiredMembers] + public LrWithoutConflicts(Grammar grammar, int stateCount, int actionCount, int gotoCount, in GrammarTables grammarTables, GrammarFileSection lr) + : base(grammar, stateCount, actionCount, gotoCount, in grammarTables, false) { int expectedSize = sizeof(uint) * 3 - + stateCount * sizeof(TActionIndex) - + actionCount * sizeof(TTokenSymbol) - + actionCount * sizeof(TAction) - + stateCount * sizeof(TEofAction) - + stateCount * sizeof(TGotoIndex) - + gotoCount * sizeof(TNonterminal) - + gotoCount * sizeof(TStateIndex); + + stateCount * _actionIndexSize + + actionCount * _tokenSymbolIndexSize + + actionCount * _actionSize + + stateCount * _eofActionSize + + stateCount * _gotoIndexSize + + gotoCount * _nonterminalIndexSize + + gotoCount * _stateIndexSize; if (lr.Length != expectedSize) { @@ -30,23 +29,20 @@ public static LrWithoutConflicts false; @@ -73,15 +69,10 @@ internal override void PrepareForParsing() internal override LrAction GetAction(int state, TokenSymbolHandle terminal) => _actionLookup![state].TryGetValue(terminal, out LrAction action) ? action : LrAction.Error; - private LrEndOfFileAction GetEndOfFileActionUnsafe(ReadOnlySpan grammarFile, int state) - { - return new(ReadUIntVariableSizeFromArray(grammarFile, EofActionBase, state)); - } - internal override LrEndOfFileAction GetEndOfFileAction(int state) { ValidateStateIndex(state); - return GetEndOfFileActionUnsafe(Grammar.GrammarFile, state); + return ReadEofAction(Grammar.GrammarFile, state); } internal override LrEndOfFileAction GetEndOfFileActionAt(int index) => GetEndOfFileAction(index); @@ -90,7 +81,7 @@ internal override (int Offset, int Count) GetEndOfFileActionBounds(int state) { ValidateStateIndex(state); - if (!GetEndOfFileActionUnsafe(Grammar.GrammarFile, state).IsError) + if (!ReadEofAction(Grammar.GrammarFile, state).IsError) { return (state, 1); } @@ -104,7 +95,7 @@ internal override void ValidateContent(ReadOnlySpan grammarFile, in Gramma for (int i = 0; i < Count; i++) { - ValidateAction(GetEndOfFileActionUnsafe(grammarFile, i), in grammarTables); + ValidateAction(ReadEofAction(grammarFile, i), in grammarTables); } } } diff --git a/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs b/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs index 5c2d916b..315e893b 100644 --- a/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs +++ b/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs @@ -113,29 +113,7 @@ public static unsafe int BufferBinarySearch(ReadOnlySpan buffer, int @b return null; } - return GetCompressedIndexSize(stateCount) switch - { - 1 => Stage1(), - 2 => Stage1(), - _ => Stage1() - }; - - Dfa Stage1() => GetCompressedIndexSize(edgeCount) switch - { - 1 => Stage2(), - 2 => Stage2(), - _ => Stage2() - }; - - Dfa Stage2() => GetCompressedIndexSize(grammar.GrammarTables.TokenSymbolRowCount) switch - { - 1 => Finish(), - 2 => Finish(), - _ => Finish() - }; - - Dfa Finish() => - DfaWithoutConflicts.Create(grammar, stateCount, edgeCount, dfa, dfaDefaultTransitions); + return new DfaWithoutConflicts(grammar, stateCount, edgeCount, grammar.GrammarTables.TokenSymbolRowCount, dfa, dfaDefaultTransitions); } private static Dfa? CreateDfaWithConflicts(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) where TChar : unmanaged, IComparable @@ -154,36 +132,7 @@ Dfa Finish() => return null; } - return GetCompressedIndexSize(stateCount) switch - { - 1 => Stage1(), - 2 => Stage1(), - _ => Stage1() - }; - - Dfa Stage1() => GetCompressedIndexSize(edgeCount) switch - { - 1 => Stage2(), - 2 => Stage2(), - _ => Stage2() - }; - - Dfa Stage2() => GetCompressedIndexSize(grammar.GrammarTables.TokenSymbolRowCount) switch - { - 1 => Stage3(), - 2 => Stage3(), - _ => Stage3() - }; - - Dfa Stage3() => GetCompressedIndexSize(acceptCount) switch - { - 1 => Finish(), - 2 => Finish(), - _ => Finish() - }; - - Dfa Finish() => - DfaWithConflicts.Create(grammar, stateCount, edgeCount, acceptCount, dfa, dfaDefaultTransitions); + return new DfaWithConflicts(grammar, stateCount, edgeCount, acceptCount, grammar.GrammarTables.TokenSymbolRowCount, dfa, dfaDefaultTransitions); } private static LrStateMachine? CreateLr(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection lr) @@ -202,60 +151,7 @@ Dfa Finish() => return null; } - return GetCompressedIndexSize(stateCount) switch - { - 1 => Stage1(), - 2 => Stage1(), - _ => Stage1() - }; - - LrStateMachine Stage1() => GetCompressedIndexSize(actionCount) switch - { - 1 => Stage2(), - 2 => Stage2(), - _ => Stage2() - }; - - LrStateMachine Stage2() => GetCompressedIndexSize(gotoCount) switch - { - 1 => Stage3(), - 2 => Stage3(), - _ => Stage3() - }; - - LrStateMachine Stage3() => GetLrActionEncodedSize(stateCount, grammar.GrammarTables.ProductionRowCount) switch - { - 1 => Stage4(), - 2 => Stage4(), - _ => Stage4() - }; - - LrStateMachine Stage4() => GetCompressedIndexSize(grammar.GrammarTables.ProductionRowCount) switch - { - 1 => Stage5(), - 2 => Stage5(), - _ => Stage5() - }; - - LrStateMachine Stage5() => GetCompressedIndexSize(grammar.GrammarTables.TokenSymbolRowCount) switch - { - 1 => Stage6(), - 2 => Stage6(), - _ => Stage6() - }; - - LrStateMachine Stage6() - where TTokenSymbol : unmanaged, IComparable => GetCompressedIndexSize(grammar.GrammarTables.NonterminalRowCount) switch - { - 1 => Finish(), - 2 => Finish(), - _ => Finish() - }; - - LrStateMachine Finish() - where TTokenSymbol : unmanaged, IComparable - where TNonterminal : unmanaged, IComparable => - LrWithoutConflicts.Create(grammar, stateCount, actionCount, gotoCount, lr); + return new LrWithoutConflicts(grammar, stateCount, actionCount, gotoCount, in grammar.GrammarTables, lr); } private static LrStateMachine? CreateLrWithConflicts(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection lr) @@ -275,67 +171,7 @@ LrStateMachine Finish Stage1(), - 2 => Stage1(), - _ => Stage1() - }; - - LrStateMachine Stage1() => GetCompressedIndexSize(actionCount) switch - { - 1 => Stage2(), - 2 => Stage2(), - _ => Stage2() - }; - - LrStateMachine Stage2() => GetCompressedIndexSize(gotoCount) switch - { - 1 => Stage3(), - 2 => Stage3(), - _ => Stage3() - }; - - LrStateMachine Stage3() => GetCompressedIndexSize(eofActionCount) switch - { - 1 => Stage4(), - 2 => Stage4(), - _ => Stage4() - }; - - LrStateMachine Stage4() => GetLrActionEncodedSize(stateCount, grammar.GrammarTables.ProductionRowCount) switch - { - 1 => Stage5(), - 2 => Stage5(), - _ => Stage5() - }; - - LrStateMachine Stage5() => GetCompressedIndexSize(grammar.GrammarTables.ProductionRowCount) switch - { - 1 => Stage6(), - 2 => Stage6(), - _ => Stage6() - }; - - LrStateMachine Stage6() => GetCompressedIndexSize(grammar.GrammarTables.TokenSymbolRowCount) switch - { - 1 => Stage7(), - 2 => Stage7(), - _ => Stage7() - }; - - LrStateMachine Stage7() - where TTokenSymbol : unmanaged, IComparable => GetCompressedIndexSize(grammar.GrammarTables.NonterminalRowCount) switch - { - 1 => Finish(), - 2 => Finish(), - _ => Finish() - }; - - LrStateMachine Finish() - where TTokenSymbol : unmanaged, IComparable - where TNonterminal : unmanaged, IComparable => - LrWithConflicts.Create(grammar, stateCount, actionCount, gotoCount, eofActionCount, lr); + return new LrWithConflicts(grammar, stateCount, actionCount, gotoCount, eofActionCount, in grammar.GrammarTables, lr); } public static (Dfa? DfaOnChar, LrStateMachine? LrStateMachine) GetGrammarStateMachines(Grammar grammar, ReadOnlySpan grammarFile, in GrammarStateMachines stateMachines) From 241efc96d4a900748eec3b9facd0d057dd11fd6a Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 14 Oct 2024 03:24:00 +0300 Subject: [PATCH 2/6] Remove some unnecessary local veriables and conditions. --- .../StateMachines/DfaWithConflicts.cs | 26 ++++++------------- .../StateMachines/DfaWithoutConflicts.cs | 25 +++++------------- .../Grammars/StateMachines/LrWithConflicts.cs | 25 ++++++------------ .../StateMachines/LrWithoutConflicts.cs | 22 +++++----------- 4 files changed, 30 insertions(+), 68 deletions(-) diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs index 1d0abf75..9fc0d89a 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs @@ -34,28 +34,18 @@ public DfaWithConflicts(Grammar grammar, int stateCount, int edgeCount, int acce ThrowHelpers.ThrowInvalidDfaDataSize(); } - int firstEdgeBase = dfa.Offset + sizeof(uint) * 3; - int rangeFromBase = firstEdgeBase + stateCount * _edgeIndexSize; - int rangeToBase = rangeFromBase + edgeCount * sizeof(TChar); - int edgeTargetBase = rangeToBase + edgeCount * sizeof(TChar); - int firstAcceptBase = edgeTargetBase + edgeCount * _stateIndexSize; - int acceptBase = firstAcceptBase + stateCount * _acceptIndexSize; - - if (dfaDefaultTransitions.Length > 0) + if (dfaDefaultTransitions.Length > 0 && dfaDefaultTransitions.Length != stateCount * _stateIndexSize) { - if (dfaDefaultTransitions.Length != stateCount * _stateIndexSize) - { - ThrowHelpers.ThrowInvalidDfaDataSize(); - } + ThrowHelpers.ThrowInvalidDfaDataSize(); } - FirstEdgeBase = firstEdgeBase; - RangeFromBase = rangeFromBase; - RangeToBase = rangeToBase; - EdgeTargetBase = edgeTargetBase; + FirstEdgeBase = dfa.Offset + sizeof(uint) * 3; + RangeFromBase = FirstEdgeBase + stateCount * _edgeIndexSize; + RangeToBase = RangeFromBase + edgeCount * sizeof(TChar); + EdgeTargetBase = RangeToBase + edgeCount * sizeof(TChar); DefaultTransitionBase = dfaDefaultTransitions.Offset; - AcceptBase = acceptBase; - FirstAcceptBase = firstAcceptBase; + FirstAcceptBase = EdgeTargetBase + edgeCount * _stateIndexSize; + AcceptBase = FirstAcceptBase + stateCount * _acceptIndexSize; } private int ReadFirstAccept(ReadOnlySpan grammarFile, int state) => diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs index b7c37d22..d8c36e1e 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs @@ -3,7 +3,6 @@ using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; -using Farkle.Buffers; namespace Farkle.Grammars.StateMachines; @@ -50,26 +49,16 @@ public DfaWithoutConflicts(Grammar grammar, int stateCount, int edgeCount, int t ThrowHelpers.ThrowInvalidDfaDataSize(); } - int firstEdgeBase = dfa.Offset + sizeof(uint) * 2; - int rangeFromBase = firstEdgeBase + stateCount * _edgeIndexSize; - int rangeToBase = rangeFromBase + edgeCount * sizeof(TChar); - int edgeTargetBase = rangeToBase + edgeCount * sizeof(TChar); - int acceptBase = edgeTargetBase + edgeCount * _stateIndexSize; - - if (dfaDefaultTransitions.Length > 0) + if (dfaDefaultTransitions.Length > 0 && dfaDefaultTransitions.Length != stateCount * _stateIndexSize) { - if (dfaDefaultTransitions.Length != stateCount * _stateIndexSize) - { - ThrowHelpers.ThrowInvalidDfaDataSize(); - } + ThrowHelpers.ThrowInvalidDfaDataSize(); } - FirstEdgeBase = firstEdgeBase; - RangeFromBase = rangeFromBase; - RangeToBase = rangeToBase; - EdgeTargetBase = edgeTargetBase; - DefaultTransitionBase = dfaDefaultTransitions.Offset; - AcceptBase = acceptBase; + FirstEdgeBase = dfa.Offset + sizeof(uint) * 2; + RangeFromBase = FirstEdgeBase + stateCount * _edgeIndexSize; + RangeToBase = RangeFromBase + edgeCount * sizeof(TChar); + EdgeTargetBase = RangeToBase + edgeCount * sizeof(TChar); + AcceptBase = EdgeTargetBase + edgeCount * _stateIndexSize; } internal override (int Offset, int Count) GetAcceptSymbolBounds(int state) diff --git a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs index cd0eb22e..baa7a5d1 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs @@ -36,23 +36,14 @@ public LrWithConflicts(Grammar grammar, int stateCount, int actionCount, int got ThrowHelpers.ThrowInvalidLrDataSize(); } - int firstActionBase = lr.Offset + sizeof(uint) * 4; - int actionTerminalBase = firstActionBase + stateCount * _actionIndexSize; - int actionBase = actionTerminalBase + actionCount * _tokenSymbolIndexSize; - int firstEofActionBase = actionBase + actionCount * _actionSize; - int eofActionBase = firstEofActionBase + stateCount * _eofActionIndexSize; - int firstGotoBase = eofActionBase + eofActionCount * _eofActionSize; - int gotoNonterminalBase = firstGotoBase + stateCount * _gotoIndexSize; - int gotoStateBase = gotoNonterminalBase + gotoCount * _nonterminalIndexSize; - - FirstActionBase = firstActionBase; - ActionTerminalBase = actionTerminalBase; - ActionBase = actionBase; - FirstEofActionBase = firstEofActionBase; - EofActionBase = eofActionBase; - FirstGotoBase = firstGotoBase; - GotoNonterminalBase = gotoNonterminalBase; - GotoStateBase = gotoStateBase; + FirstActionBase = lr.Offset + sizeof(uint) * 4; + ActionTerminalBase = FirstActionBase + stateCount * _actionIndexSize; + ActionBase = ActionTerminalBase + actionCount * _tokenSymbolIndexSize; + FirstEofActionBase = ActionBase + actionCount * _actionSize; + EofActionBase = FirstEofActionBase + stateCount * _eofActionIndexSize; + FirstGotoBase = EofActionBase + eofActionCount * _eofActionSize; + GotoNonterminalBase = FirstGotoBase + stateCount * _gotoIndexSize; + GotoStateBase = GotoNonterminalBase + gotoCount * _nonterminalIndexSize; } internal override LrAction GetAction(int state, TokenSymbolHandle terminal) => diff --git a/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs index 64a89723..d496497b 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrWithoutConflicts.cs @@ -28,21 +28,13 @@ public LrWithoutConflicts(Grammar grammar, int stateCount, int actionCount, int ThrowHelpers.ThrowInvalidLrDataSize(); } - int firstActionBase = lr.Offset + sizeof(uint) * 3; - int actionTerminalBase = firstActionBase + stateCount * _actionIndexSize; - int actionBase = actionTerminalBase + actionCount * _tokenSymbolIndexSize; - int eofActionBase = actionBase + actionCount * _actionSize; - int firstGotoBase = eofActionBase + stateCount * _eofActionSize; - int gotoNonterminalBase = firstGotoBase + stateCount * _gotoIndexSize; - int gotoStateBase = gotoNonterminalBase + gotoCount * _nonterminalIndexSize; - - FirstActionBase = firstActionBase; - ActionTerminalBase = actionTerminalBase; - ActionBase = actionBase; - EofActionBase = eofActionBase; - FirstGotoBase = firstGotoBase; - GotoNonterminalBase = gotoNonterminalBase; - GotoStateBase = gotoStateBase; + FirstActionBase = lr.Offset + sizeof(uint) * 3; + ActionTerminalBase = FirstActionBase + stateCount * _actionIndexSize; + ActionBase = ActionTerminalBase + actionCount * _tokenSymbolIndexSize; + EofActionBase = ActionBase + actionCount * _actionSize; + FirstGotoBase = EofActionBase + stateCount * _eofActionSize; + GotoNonterminalBase = FirstGotoBase + stateCount * _gotoIndexSize; + GotoStateBase = GotoNonterminalBase + gotoCount * _nonterminalIndexSize; } internal override bool StateHasConflicts(int state) => false; From f39821bfcc512b37ab73448ca83dd6d331963252 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 14 Oct 2024 03:30:19 +0300 Subject: [PATCH 3/6] Store the binary logarithm of index sizes. This allows replacing multiplications in state machine lookups with left shifts. --- src/FarkleNeo/Buffers/BufferExtensions.cs | 48 ++++++------ src/FarkleNeo/Buffers/PowerOfTwo.cs | 27 +++++++ src/FarkleNeo/Grammars/GrammarTables.cs | 6 +- src/FarkleNeo/Grammars/GrammarUtilities.cs | 73 ++++++++++--------- .../StateMachines/DfaImplementationBase.cs | 2 +- .../StateMachines/DfaWithConflicts.cs | 2 +- .../StateMachines/LrImplementationBase.cs | 4 +- .../Grammars/StateMachines/LrWithConflicts.cs | 3 +- src/FarkleNeo/Grammars/Writers/DfaWriter.cs | 10 +-- .../Grammars/Writers/GrammarTablesWriter.cs | 18 ++--- src/FarkleNeo/Grammars/Writers/LrWriter.cs | 16 ++-- 11 files changed, 122 insertions(+), 87 deletions(-) create mode 100644 src/FarkleNeo/Buffers/PowerOfTwo.cs diff --git a/src/FarkleNeo/Buffers/BufferExtensions.cs b/src/FarkleNeo/Buffers/BufferExtensions.cs index 6a809ba8..3d5c9144 100644 --- a/src/FarkleNeo/Buffers/BufferExtensions.cs +++ b/src/FarkleNeo/Buffers/BufferExtensions.cs @@ -58,30 +58,30 @@ public static ulong ReadUInt64(this ReadOnlySpan buffer, int index) => BinaryPrimitives.ReadUInt64LittleEndian(buffer[index..]); // Signed integers of variable size must be read with this method to ensure they are sign-extended. - public static int ReadIntVariableSize(this ReadOnlySpan buffer, int index, byte dataSize) + public static int ReadIntVariableSize(this ReadOnlySpan buffer, int index, PowerOfTwo dataSize) { - switch (dataSize) + switch (dataSize.Log2) { - case 1: + case 0: return (sbyte)buffer[index]; - case 2: + case 1: return (short)buffer.ReadUInt16(index); - default: - Debug.Assert(dataSize == 4); + case var n: + Debug.Assert(n == 2); return buffer.ReadInt32(index); } } - public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int index, byte dataSize) + public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int index, PowerOfTwo dataSize) { - switch (dataSize) + switch (dataSize.Log2) { - case 1: + case 0: return buffer[index]; - case 2: + case 1: return buffer.ReadUInt16(index); - default: - Debug.Assert(dataSize == 4); + case var n: + Debug.Assert(n == 2); return buffer.ReadUInt32(index); } } @@ -147,39 +147,39 @@ public static void Write(this IBufferWriter buffer, ulong value) buffer.Advance(sizeof(ulong)); } - public static void WriteVariableSize(this IBufferWriter buffer, int value, byte dataSize) + public static void WriteVariableSize(this IBufferWriter buffer, int value, PowerOfTwo dataSize) { - switch (dataSize) + switch (dataSize.Log2) { - case 1: + case 0: Debug.Assert(value == (sbyte)value); buffer.Write((byte)(sbyte)value); break; - case 2: + case 1: Debug.Assert(value == (short)value); buffer.Write((ushort)(short)value); break; - default: - Debug.Assert(dataSize == 4); + case var n: + Debug.Assert(n == 2); buffer.Write(value); break; } } - public static void WriteVariableSize(this IBufferWriter buffer, uint value, byte dataSize) + public static void WriteVariableSize(this IBufferWriter buffer, uint value, PowerOfTwo dataSize) { - switch (dataSize) + switch (dataSize.Log2) { - case 1: + case 0: Debug.Assert(value <= byte.MaxValue); buffer.Write((byte)value); break; - case 2: + case 1: Debug.Assert(value <= ushort.MaxValue); buffer.Write((ushort)value); break; - default: - Debug.Assert(dataSize == 4); + case var n: + Debug.Assert(n == 2); buffer.Write(value); break; } diff --git a/src/FarkleNeo/Buffers/PowerOfTwo.cs b/src/FarkleNeo/Buffers/PowerOfTwo.cs new file mode 100644 index 00000000..006dadb2 --- /dev/null +++ b/src/FarkleNeo/Buffers/PowerOfTwo.cs @@ -0,0 +1,27 @@ +// Copyright © Theodore Tsirpanis and Contributors. +// SPDX-License-Identifier: MIT + +using System.Diagnostics; + +namespace Farkle.Buffers; + +/// +/// Represents a power of two and enables efficiently multiplying an integer by it. +/// +[DebuggerDisplay("{Value}")] +internal readonly struct PowerOfTwo +{ + public byte Log2 { get; private init; } + + public int Value => 1 << Log2; + + public static PowerOfTwo FromLog2(int valueLog2) + { + Debug.Assert(valueLog2 is >= 0 and <= 2); + return new() { Log2 = (byte)valueLog2 }; + } + + public static int operator *(int idx, PowerOfTwo dataSize) => idx << dataSize.Log2; + + public static implicit operator int(PowerOfTwo dataSize) => dataSize.Value; +} diff --git a/src/FarkleNeo/Grammars/GrammarTables.cs b/src/FarkleNeo/Grammars/GrammarTables.cs index 9787c54f..2c10b672 100644 --- a/src/FarkleNeo/Grammars/GrammarTables.cs +++ b/src/FarkleNeo/Grammars/GrammarTables.cs @@ -63,9 +63,9 @@ internal readonly struct GrammarTables private readonly GrammarHeapSizes _heapSizes; - public byte BlobHeapIndexSize => (byte)((_heapSizes & GrammarHeapSizes.BlobHeapSmall) != 0 ? 2 : 4); + public PowerOfTwo BlobHeapIndexSize => GetBlobHeapIndexSize(_heapSizes); - public byte StringHeapIndexSize => (byte)((_heapSizes & GrammarHeapSizes.StringHeapSmall) != 0 ? 2 : 4); + public PowerOfTwo StringHeapIndexSize => GetStringHeapIndexSize(_heapSizes); public const int MaxRowCount = 0xFF_FFFF; // 2^24 - 1 @@ -124,7 +124,7 @@ private uint ReadProductionMemberHandle(ReadOnlySpan grammarFile, int inde private EntityHandle ReadSymbolHandle(ReadOnlySpan grammarFile, int index) { - byte indexSize = GetBinaryCodedIndexSize(TokenSymbolRowCount, NonterminalRowCount); + PowerOfTwo indexSize = GetBinaryCodedIndexSize(TokenSymbolRowCount, NonterminalRowCount); uint codedIndex = grammarFile.ReadUIntVariableSize(index, indexSize); // TableKind is byte-sized so the compiler optimizes away the array allocation on all frameworks. diff --git a/src/FarkleNeo/Grammars/GrammarUtilities.cs b/src/FarkleNeo/Grammars/GrammarUtilities.cs index e72f9eeb..22cda36f 100644 --- a/src/FarkleNeo/Grammars/GrammarUtilities.cs +++ b/src/FarkleNeo/Grammars/GrammarUtilities.cs @@ -1,41 +1,48 @@ // Copyright © Theodore Tsirpanis and Contributors. // SPDX-License-Identifier: MIT -namespace Farkle.Grammars +using Farkle.Buffers; + +namespace Farkle.Grammars; + +internal static class GrammarUtilities { - internal static class GrammarUtilities + /// + /// Gets the size in bytes of a compressed index to a collection of objects in a grammar. + /// + /// The number of objects in the collection. + public static PowerOfTwo GetCompressedIndexSize(int objectCount) => objectCount switch + { + < byte.MaxValue => PowerOfTwo.FromLog2(0), + < ushort.MaxValue => PowerOfTwo.FromLog2(1), + _ => PowerOfTwo.FromLog2(2) + }; + + /// + /// Gets the size in bytes of a coded index to two tables. + /// + public static PowerOfTwo GetBinaryCodedIndexSize(int row1Count, int row2Count) => (row1Count | row2Count) switch + { + <= sbyte.MaxValue => PowerOfTwo.FromLog2(0), + <= short.MaxValue => PowerOfTwo.FromLog2(1), + _ => PowerOfTwo.FromLog2(2) + }; + + /// + /// Gets the size in bytes of the encoded representation of an . + /// + /// The number of LR(0) states in the grammar. + /// The number of productions in the grammar. + public static PowerOfTwo GetLrActionEncodedSize(int stateCount, int productionCount) => (stateCount, productionCount) switch { - /// - /// Gets the size in bytes of a compressed index to a collection of objects in a grammar. - /// - /// The number of objects in the collection. - public static byte GetCompressedIndexSize(int objectCount) => objectCount switch - { - < byte.MaxValue => sizeof(byte), - < ushort.MaxValue => sizeof(ushort), - _ => sizeof(uint) - }; + (<= sbyte.MaxValue - 1, <= -sbyte.MinValue) => PowerOfTwo.FromLog2(0), + (<= short.MaxValue - 1, <= -short.MinValue) => PowerOfTwo.FromLog2(1), + _ => PowerOfTwo.FromLog2(2) + }; - /// - /// Gets the size in bytes of a coded index to two tables. - /// - public static byte GetBinaryCodedIndexSize(int row1Count, int row2Count) => (row1Count | row2Count) switch - { - <= sbyte.MaxValue => sizeof(sbyte), - <= short.MaxValue => sizeof(short), - _ => sizeof(int) - }; + public static PowerOfTwo GetStringHeapIndexSize(GrammarHeapSizes heapSizes) => + PowerOfTwo.FromLog2((heapSizes & GrammarHeapSizes.StringHeapSmall) != 0 ? 1 : 2); - /// - /// Gets the size in bytes of the encoded representation of an . - /// - /// The number of LR(0) states in the grammar. - /// The number of productions in the grammar. - public static byte GetLrActionEncodedSize(int stateCount, int productionCount) => (stateCount, productionCount) switch - { - (<= sbyte.MaxValue - 1, <= -sbyte.MinValue) => sizeof(sbyte), - (<= short.MaxValue - 1, <= -short.MinValue) => sizeof(short), - _ => sizeof(int) - }; - } + public static PowerOfTwo GetBlobHeapIndexSize(GrammarHeapSizes heapSizes) => + PowerOfTwo.FromLog2((heapSizes & GrammarHeapSizes.BlobHeapSmall) != 0 ? 1 : 2); } diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs b/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs index 8ad54c6e..c4eade5a 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs @@ -9,7 +9,7 @@ namespace Farkle.Grammars.StateMachines; internal unsafe abstract class DfaImplementationBase : Dfa where TChar : unmanaged, IComparable { - protected readonly byte _stateIndexSize, _edgeIndexSize, _tokenSymbolIndexSize; + protected readonly PowerOfTwo _stateIndexSize, _edgeIndexSize, _tokenSymbolIndexSize; protected readonly int _edgeCount; diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs index 9fc0d89a..f7a05493 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs @@ -8,7 +8,7 @@ namespace Farkle.Grammars.StateMachines; internal unsafe sealed class DfaWithConflicts : DfaImplementationBase where TChar : unmanaged, IComparable { - private readonly byte _acceptIndexSize; + private readonly PowerOfTwo _acceptIndexSize; private readonly int _acceptCount; diff --git a/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs b/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs index 76bb9835..5c1fcf9a 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs @@ -10,7 +10,7 @@ namespace Farkle.Grammars.StateMachines; internal unsafe abstract class LrImplementationBase : LrStateMachine { - protected readonly byte _stateIndexSize, _actionIndexSize, _gotoIndexSize, _actionSize, _eofActionSize, _tokenSymbolIndexSize, _nonterminalIndexSize; + protected readonly PowerOfTwo _stateIndexSize, _actionIndexSize, _gotoIndexSize, _actionSize, _eofActionSize, _tokenSymbolIndexSize, _nonterminalIndexSize; private Dictionary[]? _gotoLookup; @@ -62,7 +62,7 @@ protected int ReadFirstGoto(ReadOnlySpan grammarFile, int state) => protected int ReadGoto(ReadOnlySpan grammarFile, int index) => (int)ReadUIntVariableSizeFromArray(grammarFile, GotoStateBase, index, _stateIndexSize); - protected static uint ReadUIntVariableSizeFromArray(ReadOnlySpan grammarFile, int @base, int index, byte indexSize) => + protected static uint ReadUIntVariableSizeFromArray(ReadOnlySpan grammarFile, int @base, int index, PowerOfTwo indexSize) => grammarFile.ReadUIntVariableSize(@base + index * indexSize, indexSize); internal sealed override Grammar Grammar { get; } diff --git a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs index baa7a5d1..37471d10 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs @@ -2,12 +2,13 @@ // SPDX-License-Identifier: MIT using System.Diagnostics.CodeAnalysis; +using Farkle.Buffers; namespace Farkle.Grammars.StateMachines; internal unsafe sealed class LrWithConflicts : LrImplementationBase { - private readonly byte _eofActionIndexSize; + private readonly PowerOfTwo _eofActionIndexSize; private readonly int _eofActionCount; diff --git a/src/FarkleNeo/Grammars/Writers/DfaWriter.cs b/src/FarkleNeo/Grammars/Writers/DfaWriter.cs index c3b4b2d4..62746b9a 100644 --- a/src/FarkleNeo/Grammars/Writers/DfaWriter.cs +++ b/src/FarkleNeo/Grammars/Writers/DfaWriter.cs @@ -176,9 +176,9 @@ public void WriteDfaData(IBufferWriter writer, int tokenSymbolCount) writer.Write(_accepts.Count); } - byte stateTargetSize = GetCompressedIndexSize(StateCount); - byte edgeIndexSize = GetCompressedIndexSize(_edges.Count); - byte tokenSymbolSize = GetCompressedIndexSize(tokenSymbolCount); + var stateTargetSize = GetCompressedIndexSize(StateCount); + var edgeIndexSize = GetCompressedIndexSize(_edges.Count); + var tokenSymbolSize = GetCompressedIndexSize(tokenSymbolCount); foreach (int firstEdge in _firstEdges) { @@ -199,7 +199,7 @@ public void WriteDfaData(IBufferWriter writer, int tokenSymbolCount) if (HasConflicts) { - byte acceptIndexSize = GetCompressedIndexSize(_accepts.Count); + var acceptIndexSize = GetCompressedIndexSize(_accepts.Count); foreach (int firstAccept in _firstAccepts) { writer.WriteVariableSize((uint)firstAccept, acceptIndexSize); @@ -230,7 +230,7 @@ public void WriteDefaultTransitions(IBufferWriter writer) ThrowHelpers.ThrowInvalidOperationException("DFA has no default transitions."); } - byte stateTargetSize = GetCompressedIndexSize(StateCount); + var stateTargetSize = GetCompressedIndexSize(StateCount); foreach (int state in _defaultTransitions) { writer.WriteVariableSize((uint)state, stateTargetSize); diff --git a/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs b/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs index 9664c773..b2ae025b 100644 --- a/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs +++ b/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs @@ -318,17 +318,17 @@ public readonly void WriteTo(IBufferWriter writer, GrammarHeapSizes heapSi int stateMachineRows = _stateMachines?.Count ?? 0; int specialNameRows = _specialNames?.Count ?? 0; - byte blobHeapIndexSize = (byte)((heapSizes & GrammarHeapSizes.BlobHeapSmall) != 0 ? 2 : 4); - byte stringHeapIndexSize = (byte)((heapSizes & GrammarHeapSizes.StringHeapSmall) != 0 ? 2 : 4); + var blobHeapIndexSize = GetBlobHeapIndexSize(heapSizes); + var stringHeapIndexSize = GetStringHeapIndexSize(heapSizes); - byte tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolRows); - byte groupIndexSize = GetCompressedIndexSize(groupRows); - byte groupNestingIndexSize = GetCompressedIndexSize(groupNestingRows); - byte nonterminalIndexSize = GetCompressedIndexSize(nonterminalRows); - byte productionIndexSize = GetCompressedIndexSize(productionRows); - byte productionMemberIndexSize = GetCompressedIndexSize(productionMemberRows); + var tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolRows); + var groupIndexSize = GetCompressedIndexSize(groupRows); + var groupNestingIndexSize = GetCompressedIndexSize(groupNestingRows); + var nonterminalIndexSize = GetCompressedIndexSize(nonterminalRows); + var productionIndexSize = GetCompressedIndexSize(productionRows); + var productionMemberIndexSize = GetCompressedIndexSize(productionMemberRows); - byte symbolCodedIndexSize = GetBinaryCodedIndexSize(tokenSymbolRows, nonterminalRows); + var symbolCodedIndexSize = GetBinaryCodedIndexSize(tokenSymbolRows, nonterminalRows); TableKinds presentTables = PresentTables; int presentTableCount = BitOperationsCompat.PopCount((ulong)presentTables); diff --git a/src/FarkleNeo/Grammars/Writers/LrWriter.cs b/src/FarkleNeo/Grammars/Writers/LrWriter.cs index 41850841..8e47dc76 100644 --- a/src/FarkleNeo/Grammars/Writers/LrWriter.cs +++ b/src/FarkleNeo/Grammars/Writers/LrWriter.cs @@ -202,13 +202,13 @@ public void WriteData(IBufferWriter writer, int tokenSymbolCount, int term writer.Write(_eofActions.Count); } - byte stateIndexSize = GetCompressedIndexSize(StateCount); - byte actionIndexSize = GetCompressedIndexSize(_actions.Count); - byte actionSize = GetLrActionEncodedSize(StateCount, productionCount); - byte eofActionSize = GetCompressedIndexSize(productionCount); - byte gotoIndexSize = GetCompressedIndexSize(_gotos.Count); - byte nonterminalIndexSize = GetCompressedIndexSize(nonterminalCount); - byte tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolCount); + var stateIndexSize = GetCompressedIndexSize(StateCount); + var actionIndexSize = GetCompressedIndexSize(_actions.Count); + var actionSize = GetLrActionEncodedSize(StateCount, productionCount); + var eofActionSize = GetCompressedIndexSize(productionCount); + var gotoIndexSize = GetCompressedIndexSize(_gotos.Count); + var nonterminalIndexSize = GetCompressedIndexSize(nonterminalCount); + var tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolCount); foreach (int firstAction in _firstActions) { @@ -224,7 +224,7 @@ public void WriteData(IBufferWriter writer, int tokenSymbolCount, int term } if (HasConflicts) { - byte eofActionIndexSize = GetCompressedIndexSize(_eofActions.Count); + var eofActionIndexSize = GetCompressedIndexSize(_eofActions.Count); foreach (int firstEofAction in _firstEofActions) { writer.WriteVariableSize((uint)firstEofAction, eofActionIndexSize); From a25232cedf8f4b666dba06e6a2ce1cc2f37e8b1a Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Sun, 20 Oct 2024 18:27:04 +0300 Subject: [PATCH 4/6] Revert "Store the binary logarithm of index sizes." This reverts commit f39821bfcc512b37ab73448ca83dd6d331963252. Performance was not improved by that change. --- src/FarkleNeo/Buffers/BufferExtensions.cs | 48 ++++++------ src/FarkleNeo/Buffers/PowerOfTwo.cs | 27 ------- src/FarkleNeo/Grammars/GrammarTables.cs | 6 +- src/FarkleNeo/Grammars/GrammarUtilities.cs | 73 +++++++++---------- .../StateMachines/DfaImplementationBase.cs | 2 +- .../StateMachines/DfaWithConflicts.cs | 2 +- .../StateMachines/LrImplementationBase.cs | 4 +- .../Grammars/StateMachines/LrWithConflicts.cs | 3 +- src/FarkleNeo/Grammars/Writers/DfaWriter.cs | 10 +-- .../Grammars/Writers/GrammarTablesWriter.cs | 18 ++--- src/FarkleNeo/Grammars/Writers/LrWriter.cs | 16 ++-- 11 files changed, 87 insertions(+), 122 deletions(-) delete mode 100644 src/FarkleNeo/Buffers/PowerOfTwo.cs diff --git a/src/FarkleNeo/Buffers/BufferExtensions.cs b/src/FarkleNeo/Buffers/BufferExtensions.cs index 3d5c9144..6a809ba8 100644 --- a/src/FarkleNeo/Buffers/BufferExtensions.cs +++ b/src/FarkleNeo/Buffers/BufferExtensions.cs @@ -58,30 +58,30 @@ public static ulong ReadUInt64(this ReadOnlySpan buffer, int index) => BinaryPrimitives.ReadUInt64LittleEndian(buffer[index..]); // Signed integers of variable size must be read with this method to ensure they are sign-extended. - public static int ReadIntVariableSize(this ReadOnlySpan buffer, int index, PowerOfTwo dataSize) + public static int ReadIntVariableSize(this ReadOnlySpan buffer, int index, byte dataSize) { - switch (dataSize.Log2) + switch (dataSize) { - case 0: - return (sbyte)buffer[index]; case 1: + return (sbyte)buffer[index]; + case 2: return (short)buffer.ReadUInt16(index); - case var n: - Debug.Assert(n == 2); + default: + Debug.Assert(dataSize == 4); return buffer.ReadInt32(index); } } - public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int index, PowerOfTwo dataSize) + public static uint ReadUIntVariableSize(this ReadOnlySpan buffer, int index, byte dataSize) { - switch (dataSize.Log2) + switch (dataSize) { - case 0: - return buffer[index]; case 1: + return buffer[index]; + case 2: return buffer.ReadUInt16(index); - case var n: - Debug.Assert(n == 2); + default: + Debug.Assert(dataSize == 4); return buffer.ReadUInt32(index); } } @@ -147,39 +147,39 @@ public static void Write(this IBufferWriter buffer, ulong value) buffer.Advance(sizeof(ulong)); } - public static void WriteVariableSize(this IBufferWriter buffer, int value, PowerOfTwo dataSize) + public static void WriteVariableSize(this IBufferWriter buffer, int value, byte dataSize) { - switch (dataSize.Log2) + switch (dataSize) { - case 0: + case 1: Debug.Assert(value == (sbyte)value); buffer.Write((byte)(sbyte)value); break; - case 1: + case 2: Debug.Assert(value == (short)value); buffer.Write((ushort)(short)value); break; - case var n: - Debug.Assert(n == 2); + default: + Debug.Assert(dataSize == 4); buffer.Write(value); break; } } - public static void WriteVariableSize(this IBufferWriter buffer, uint value, PowerOfTwo dataSize) + public static void WriteVariableSize(this IBufferWriter buffer, uint value, byte dataSize) { - switch (dataSize.Log2) + switch (dataSize) { - case 0: + case 1: Debug.Assert(value <= byte.MaxValue); buffer.Write((byte)value); break; - case 1: + case 2: Debug.Assert(value <= ushort.MaxValue); buffer.Write((ushort)value); break; - case var n: - Debug.Assert(n == 2); + default: + Debug.Assert(dataSize == 4); buffer.Write(value); break; } diff --git a/src/FarkleNeo/Buffers/PowerOfTwo.cs b/src/FarkleNeo/Buffers/PowerOfTwo.cs deleted file mode 100644 index 006dadb2..00000000 --- a/src/FarkleNeo/Buffers/PowerOfTwo.cs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright © Theodore Tsirpanis and Contributors. -// SPDX-License-Identifier: MIT - -using System.Diagnostics; - -namespace Farkle.Buffers; - -/// -/// Represents a power of two and enables efficiently multiplying an integer by it. -/// -[DebuggerDisplay("{Value}")] -internal readonly struct PowerOfTwo -{ - public byte Log2 { get; private init; } - - public int Value => 1 << Log2; - - public static PowerOfTwo FromLog2(int valueLog2) - { - Debug.Assert(valueLog2 is >= 0 and <= 2); - return new() { Log2 = (byte)valueLog2 }; - } - - public static int operator *(int idx, PowerOfTwo dataSize) => idx << dataSize.Log2; - - public static implicit operator int(PowerOfTwo dataSize) => dataSize.Value; -} diff --git a/src/FarkleNeo/Grammars/GrammarTables.cs b/src/FarkleNeo/Grammars/GrammarTables.cs index 2c10b672..9787c54f 100644 --- a/src/FarkleNeo/Grammars/GrammarTables.cs +++ b/src/FarkleNeo/Grammars/GrammarTables.cs @@ -63,9 +63,9 @@ internal readonly struct GrammarTables private readonly GrammarHeapSizes _heapSizes; - public PowerOfTwo BlobHeapIndexSize => GetBlobHeapIndexSize(_heapSizes); + public byte BlobHeapIndexSize => (byte)((_heapSizes & GrammarHeapSizes.BlobHeapSmall) != 0 ? 2 : 4); - public PowerOfTwo StringHeapIndexSize => GetStringHeapIndexSize(_heapSizes); + public byte StringHeapIndexSize => (byte)((_heapSizes & GrammarHeapSizes.StringHeapSmall) != 0 ? 2 : 4); public const int MaxRowCount = 0xFF_FFFF; // 2^24 - 1 @@ -124,7 +124,7 @@ private uint ReadProductionMemberHandle(ReadOnlySpan grammarFile, int inde private EntityHandle ReadSymbolHandle(ReadOnlySpan grammarFile, int index) { - PowerOfTwo indexSize = GetBinaryCodedIndexSize(TokenSymbolRowCount, NonterminalRowCount); + byte indexSize = GetBinaryCodedIndexSize(TokenSymbolRowCount, NonterminalRowCount); uint codedIndex = grammarFile.ReadUIntVariableSize(index, indexSize); // TableKind is byte-sized so the compiler optimizes away the array allocation on all frameworks. diff --git a/src/FarkleNeo/Grammars/GrammarUtilities.cs b/src/FarkleNeo/Grammars/GrammarUtilities.cs index 22cda36f..e72f9eeb 100644 --- a/src/FarkleNeo/Grammars/GrammarUtilities.cs +++ b/src/FarkleNeo/Grammars/GrammarUtilities.cs @@ -1,48 +1,41 @@ // Copyright © Theodore Tsirpanis and Contributors. // SPDX-License-Identifier: MIT -using Farkle.Buffers; - -namespace Farkle.Grammars; - -internal static class GrammarUtilities +namespace Farkle.Grammars { - /// - /// Gets the size in bytes of a compressed index to a collection of objects in a grammar. - /// - /// The number of objects in the collection. - public static PowerOfTwo GetCompressedIndexSize(int objectCount) => objectCount switch - { - < byte.MaxValue => PowerOfTwo.FromLog2(0), - < ushort.MaxValue => PowerOfTwo.FromLog2(1), - _ => PowerOfTwo.FromLog2(2) - }; - - /// - /// Gets the size in bytes of a coded index to two tables. - /// - public static PowerOfTwo GetBinaryCodedIndexSize(int row1Count, int row2Count) => (row1Count | row2Count) switch - { - <= sbyte.MaxValue => PowerOfTwo.FromLog2(0), - <= short.MaxValue => PowerOfTwo.FromLog2(1), - _ => PowerOfTwo.FromLog2(2) - }; - - /// - /// Gets the size in bytes of the encoded representation of an . - /// - /// The number of LR(0) states in the grammar. - /// The number of productions in the grammar. - public static PowerOfTwo GetLrActionEncodedSize(int stateCount, int productionCount) => (stateCount, productionCount) switch + internal static class GrammarUtilities { - (<= sbyte.MaxValue - 1, <= -sbyte.MinValue) => PowerOfTwo.FromLog2(0), - (<= short.MaxValue - 1, <= -short.MinValue) => PowerOfTwo.FromLog2(1), - _ => PowerOfTwo.FromLog2(2) - }; + /// + /// Gets the size in bytes of a compressed index to a collection of objects in a grammar. + /// + /// The number of objects in the collection. + public static byte GetCompressedIndexSize(int objectCount) => objectCount switch + { + < byte.MaxValue => sizeof(byte), + < ushort.MaxValue => sizeof(ushort), + _ => sizeof(uint) + }; - public static PowerOfTwo GetStringHeapIndexSize(GrammarHeapSizes heapSizes) => - PowerOfTwo.FromLog2((heapSizes & GrammarHeapSizes.StringHeapSmall) != 0 ? 1 : 2); + /// + /// Gets the size in bytes of a coded index to two tables. + /// + public static byte GetBinaryCodedIndexSize(int row1Count, int row2Count) => (row1Count | row2Count) switch + { + <= sbyte.MaxValue => sizeof(sbyte), + <= short.MaxValue => sizeof(short), + _ => sizeof(int) + }; - public static PowerOfTwo GetBlobHeapIndexSize(GrammarHeapSizes heapSizes) => - PowerOfTwo.FromLog2((heapSizes & GrammarHeapSizes.BlobHeapSmall) != 0 ? 1 : 2); + /// + /// Gets the size in bytes of the encoded representation of an . + /// + /// The number of LR(0) states in the grammar. + /// The number of productions in the grammar. + public static byte GetLrActionEncodedSize(int stateCount, int productionCount) => (stateCount, productionCount) switch + { + (<= sbyte.MaxValue - 1, <= -sbyte.MinValue) => sizeof(sbyte), + (<= short.MaxValue - 1, <= -short.MinValue) => sizeof(short), + _ => sizeof(int) + }; + } } diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs b/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs index c4eade5a..8ad54c6e 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs @@ -9,7 +9,7 @@ namespace Farkle.Grammars.StateMachines; internal unsafe abstract class DfaImplementationBase : Dfa where TChar : unmanaged, IComparable { - protected readonly PowerOfTwo _stateIndexSize, _edgeIndexSize, _tokenSymbolIndexSize; + protected readonly byte _stateIndexSize, _edgeIndexSize, _tokenSymbolIndexSize; protected readonly int _edgeCount; diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs index f7a05493..9fc0d89a 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs @@ -8,7 +8,7 @@ namespace Farkle.Grammars.StateMachines; internal unsafe sealed class DfaWithConflicts : DfaImplementationBase where TChar : unmanaged, IComparable { - private readonly PowerOfTwo _acceptIndexSize; + private readonly byte _acceptIndexSize; private readonly int _acceptCount; diff --git a/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs b/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs index 5c1fcf9a..76bb9835 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrImplementationBase.cs @@ -10,7 +10,7 @@ namespace Farkle.Grammars.StateMachines; internal unsafe abstract class LrImplementationBase : LrStateMachine { - protected readonly PowerOfTwo _stateIndexSize, _actionIndexSize, _gotoIndexSize, _actionSize, _eofActionSize, _tokenSymbolIndexSize, _nonterminalIndexSize; + protected readonly byte _stateIndexSize, _actionIndexSize, _gotoIndexSize, _actionSize, _eofActionSize, _tokenSymbolIndexSize, _nonterminalIndexSize; private Dictionary[]? _gotoLookup; @@ -62,7 +62,7 @@ protected int ReadFirstGoto(ReadOnlySpan grammarFile, int state) => protected int ReadGoto(ReadOnlySpan grammarFile, int index) => (int)ReadUIntVariableSizeFromArray(grammarFile, GotoStateBase, index, _stateIndexSize); - protected static uint ReadUIntVariableSizeFromArray(ReadOnlySpan grammarFile, int @base, int index, PowerOfTwo indexSize) => + protected static uint ReadUIntVariableSizeFromArray(ReadOnlySpan grammarFile, int @base, int index, byte indexSize) => grammarFile.ReadUIntVariableSize(@base + index * indexSize, indexSize); internal sealed override Grammar Grammar { get; } diff --git a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs index 37471d10..baa7a5d1 100644 --- a/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/LrWithConflicts.cs @@ -2,13 +2,12 @@ // SPDX-License-Identifier: MIT using System.Diagnostics.CodeAnalysis; -using Farkle.Buffers; namespace Farkle.Grammars.StateMachines; internal unsafe sealed class LrWithConflicts : LrImplementationBase { - private readonly PowerOfTwo _eofActionIndexSize; + private readonly byte _eofActionIndexSize; private readonly int _eofActionCount; diff --git a/src/FarkleNeo/Grammars/Writers/DfaWriter.cs b/src/FarkleNeo/Grammars/Writers/DfaWriter.cs index 62746b9a..c3b4b2d4 100644 --- a/src/FarkleNeo/Grammars/Writers/DfaWriter.cs +++ b/src/FarkleNeo/Grammars/Writers/DfaWriter.cs @@ -176,9 +176,9 @@ public void WriteDfaData(IBufferWriter writer, int tokenSymbolCount) writer.Write(_accepts.Count); } - var stateTargetSize = GetCompressedIndexSize(StateCount); - var edgeIndexSize = GetCompressedIndexSize(_edges.Count); - var tokenSymbolSize = GetCompressedIndexSize(tokenSymbolCount); + byte stateTargetSize = GetCompressedIndexSize(StateCount); + byte edgeIndexSize = GetCompressedIndexSize(_edges.Count); + byte tokenSymbolSize = GetCompressedIndexSize(tokenSymbolCount); foreach (int firstEdge in _firstEdges) { @@ -199,7 +199,7 @@ public void WriteDfaData(IBufferWriter writer, int tokenSymbolCount) if (HasConflicts) { - var acceptIndexSize = GetCompressedIndexSize(_accepts.Count); + byte acceptIndexSize = GetCompressedIndexSize(_accepts.Count); foreach (int firstAccept in _firstAccepts) { writer.WriteVariableSize((uint)firstAccept, acceptIndexSize); @@ -230,7 +230,7 @@ public void WriteDefaultTransitions(IBufferWriter writer) ThrowHelpers.ThrowInvalidOperationException("DFA has no default transitions."); } - var stateTargetSize = GetCompressedIndexSize(StateCount); + byte stateTargetSize = GetCompressedIndexSize(StateCount); foreach (int state in _defaultTransitions) { writer.WriteVariableSize((uint)state, stateTargetSize); diff --git a/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs b/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs index b2ae025b..9664c773 100644 --- a/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs +++ b/src/FarkleNeo/Grammars/Writers/GrammarTablesWriter.cs @@ -318,17 +318,17 @@ public readonly void WriteTo(IBufferWriter writer, GrammarHeapSizes heapSi int stateMachineRows = _stateMachines?.Count ?? 0; int specialNameRows = _specialNames?.Count ?? 0; - var blobHeapIndexSize = GetBlobHeapIndexSize(heapSizes); - var stringHeapIndexSize = GetStringHeapIndexSize(heapSizes); + byte blobHeapIndexSize = (byte)((heapSizes & GrammarHeapSizes.BlobHeapSmall) != 0 ? 2 : 4); + byte stringHeapIndexSize = (byte)((heapSizes & GrammarHeapSizes.StringHeapSmall) != 0 ? 2 : 4); - var tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolRows); - var groupIndexSize = GetCompressedIndexSize(groupRows); - var groupNestingIndexSize = GetCompressedIndexSize(groupNestingRows); - var nonterminalIndexSize = GetCompressedIndexSize(nonterminalRows); - var productionIndexSize = GetCompressedIndexSize(productionRows); - var productionMemberIndexSize = GetCompressedIndexSize(productionMemberRows); + byte tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolRows); + byte groupIndexSize = GetCompressedIndexSize(groupRows); + byte groupNestingIndexSize = GetCompressedIndexSize(groupNestingRows); + byte nonterminalIndexSize = GetCompressedIndexSize(nonterminalRows); + byte productionIndexSize = GetCompressedIndexSize(productionRows); + byte productionMemberIndexSize = GetCompressedIndexSize(productionMemberRows); - var symbolCodedIndexSize = GetBinaryCodedIndexSize(tokenSymbolRows, nonterminalRows); + byte symbolCodedIndexSize = GetBinaryCodedIndexSize(tokenSymbolRows, nonterminalRows); TableKinds presentTables = PresentTables; int presentTableCount = BitOperationsCompat.PopCount((ulong)presentTables); diff --git a/src/FarkleNeo/Grammars/Writers/LrWriter.cs b/src/FarkleNeo/Grammars/Writers/LrWriter.cs index 8e47dc76..41850841 100644 --- a/src/FarkleNeo/Grammars/Writers/LrWriter.cs +++ b/src/FarkleNeo/Grammars/Writers/LrWriter.cs @@ -202,13 +202,13 @@ public void WriteData(IBufferWriter writer, int tokenSymbolCount, int term writer.Write(_eofActions.Count); } - var stateIndexSize = GetCompressedIndexSize(StateCount); - var actionIndexSize = GetCompressedIndexSize(_actions.Count); - var actionSize = GetLrActionEncodedSize(StateCount, productionCount); - var eofActionSize = GetCompressedIndexSize(productionCount); - var gotoIndexSize = GetCompressedIndexSize(_gotos.Count); - var nonterminalIndexSize = GetCompressedIndexSize(nonterminalCount); - var tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolCount); + byte stateIndexSize = GetCompressedIndexSize(StateCount); + byte actionIndexSize = GetCompressedIndexSize(_actions.Count); + byte actionSize = GetLrActionEncodedSize(StateCount, productionCount); + byte eofActionSize = GetCompressedIndexSize(productionCount); + byte gotoIndexSize = GetCompressedIndexSize(_gotos.Count); + byte nonterminalIndexSize = GetCompressedIndexSize(nonterminalCount); + byte tokenSymbolIndexSize = GetCompressedIndexSize(tokenSymbolCount); foreach (int firstAction in _firstActions) { @@ -224,7 +224,7 @@ public void WriteData(IBufferWriter writer, int tokenSymbolCount, int term } if (HasConflicts) { - var eofActionIndexSize = GetCompressedIndexSize(_eofActions.Count); + byte eofActionIndexSize = GetCompressedIndexSize(_eofActions.Count); foreach (int firstEofAction in _firstEofActions) { writer.WriteVariableSize((uint)firstEofAction, eofActionIndexSize); From 153fe11973102bea4e4c78580eef05140c1648d4 Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 21 Oct 2024 00:05:55 +0300 Subject: [PATCH 5/6] Fix missing assignment. --- src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs b/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs index d8c36e1e..571b8eb2 100644 --- a/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs +++ b/src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs @@ -58,6 +58,7 @@ public DfaWithoutConflicts(Grammar grammar, int stateCount, int edgeCount, int t RangeFromBase = FirstEdgeBase + stateCount * _edgeIndexSize; RangeToBase = RangeFromBase + edgeCount * sizeof(TChar); EdgeTargetBase = RangeToBase + edgeCount * sizeof(TChar); + DefaultTransitionBase = dfaDefaultTransitions.Offset; AcceptBase = EdgeTargetBase + edgeCount * _stateIndexSize; } From 9ada5bb7d601cfba1906ddff13bca9ddd6de777b Mon Sep 17 00:00:00 2001 From: Theodore Tsirpanis Date: Mon, 21 Oct 2024 00:36:55 +0300 Subject: [PATCH 6/6] Fix SonarCloud warnings. --- .../Grammars/StateMachines/StateMachineUtilities.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs b/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs index 315e893b..c85bc51b 100644 --- a/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs +++ b/src/FarkleNeo/Grammars/StateMachines/StateMachineUtilities.cs @@ -98,7 +98,7 @@ public static unsafe int BufferBinarySearch(ReadOnlySpan buffer, int @b return ~low; } - private static Dfa? CreateDfa(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) where TChar : unmanaged, IComparable + private static DfaWithoutConflicts? CreateDfa(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) where TChar : unmanaged, IComparable { if (dfa.Length < sizeof(uint) * 2) { @@ -116,7 +116,7 @@ public static unsafe int BufferBinarySearch(ReadOnlySpan buffer, int @b return new DfaWithoutConflicts(grammar, stateCount, edgeCount, grammar.GrammarTables.TokenSymbolRowCount, dfa, dfaDefaultTransitions); } - private static Dfa? CreateDfaWithConflicts(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) where TChar : unmanaged, IComparable + private static DfaWithConflicts? CreateDfaWithConflicts(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions) where TChar : unmanaged, IComparable { if (dfa.Length < sizeof(uint) * 3) { @@ -135,7 +135,7 @@ public static unsafe int BufferBinarySearch(ReadOnlySpan buffer, int @b return new DfaWithConflicts(grammar, stateCount, edgeCount, acceptCount, grammar.GrammarTables.TokenSymbolRowCount, dfa, dfaDefaultTransitions); } - private static LrStateMachine? CreateLr(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection lr) + private static LrWithoutConflicts? CreateLr(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection lr) { if (lr.Length < sizeof(uint) * 3) { @@ -154,7 +154,7 @@ public static unsafe int BufferBinarySearch(ReadOnlySpan buffer, int @b return new LrWithoutConflicts(grammar, stateCount, actionCount, gotoCount, in grammar.GrammarTables, lr); } - private static LrStateMachine? CreateLrWithConflicts(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection lr) + private static LrWithConflicts? CreateLrWithConflicts(Grammar grammar, ReadOnlySpan grammarFile, GrammarFileSection lr) { if (lr.Length < sizeof(uint) * 4) {