Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace generic state machine index types with dynamic dispatch et al. #305

Merged
merged 6 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 9 additions & 31 deletions src/FarkleNeo/Buffers/BufferExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,18 @@ public static ulong ReadUInt64(this ReadOnlySpan<byte> buffer, int index) =>
BinaryPrimitives.ReadUInt64LittleEndian(buffer[index..]);

// Signed integers of variable size must be read with this method to ensure they are sign-extended.
public static int ReadIntVariableSize<T>(this ReadOnlySpan<byte> buffer, int index)
public static int ReadIntVariableSize(this ReadOnlySpan<byte> buffer, int index, byte dataSize)
{
if (typeof(T) == typeof(sbyte))
{
return (sbyte)buffer[index];
}
if (typeof(T) == typeof(short))
{
return (short)buffer.ReadUInt16(index);
}
if (typeof(T) == typeof(int))
switch (dataSize)
{
return buffer.ReadInt32(index);
case 1:
return (sbyte)buffer[index];
case 2:
return (short)buffer.ReadUInt16(index);
default:
Debug.Assert(dataSize == 4);
return buffer.ReadInt32(index);
}

throw new NotSupportedException("Unsupported type.");
}

public static uint ReadUIntVariableSize(this ReadOnlySpan<byte> buffer, int index, byte dataSize)
Expand All @@ -90,24 +86,6 @@ public static uint ReadUIntVariableSize(this ReadOnlySpan<byte> buffer, int inde
}
}

public static uint ReadUIntVariableSize<T>(this ReadOnlySpan<byte> buffer, int index)
{
if (typeof(T) == typeof(byte))
{
return buffer[index];
}
if (typeof(T) == typeof(ushort))
{
return buffer.ReadUInt16(index);
}
if (typeof(T) == typeof(uint))
{
return buffer.ReadUInt32(index);
}

throw new NotSupportedException("Unsupported type.");
}

public static void WriteBlobLength(this IBufferWriter<byte> buffer, int value)
{
switch ((uint)value)
Expand Down
27 changes: 17 additions & 10 deletions src/FarkleNeo/Grammars/StateMachines/DfaImplementationBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
// SPDX-License-Identifier: MIT

using Farkle.Buffers;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;

namespace Farkle.Grammars.StateMachines;

internal unsafe abstract class DfaImplementationBase<TChar, TState, TEdge> : Dfa<TChar> where TChar : unmanaged, IComparable<TChar>
internal unsafe abstract class DfaImplementationBase<TChar> : Dfa<TChar> where TChar : unmanaged, IComparable<TChar>
{
protected readonly byte _stateIndexSize, _edgeIndexSize, _tokenSymbolIndexSize;

protected readonly int _edgeCount;

public required int FirstEdgeBase { get; init; }
Expand All @@ -22,26 +23,32 @@ internal unsafe abstract class DfaImplementationBase<TChar, TState, TEdge> : Dfa

public required int DefaultTransitionBase { get; init; }

protected DfaImplementationBase(Grammar grammar, int stateCount, int edgeCount, bool hasConflicts) : base(stateCount, hasConflicts)
public required int AcceptBase { get; init; }

protected DfaImplementationBase(Grammar grammar, int stateCount, int edgeCount, int tokenSymbolCount, bool hasConflicts) : base(stateCount, hasConflicts)
{
Debug.Assert(GrammarUtilities.GetCompressedIndexSize(stateCount) == sizeof(TState));
Debug.Assert(GrammarUtilities.GetCompressedIndexSize(edgeCount) == sizeof(TEdge));
_stateIndexSize = GrammarUtilities.GetCompressedIndexSize(stateCount);
_edgeIndexSize = GrammarUtilities.GetCompressedIndexSize(edgeCount);
_tokenSymbolIndexSize = GrammarUtilities.GetCompressedIndexSize(tokenSymbolCount);

Grammar = grammar;
_edgeCount = edgeCount;
}

protected int ReadFirstEdge(ReadOnlySpan<byte> grammarFile, int state) =>
(int)grammarFile.ReadUIntVariableSize<TEdge>(FirstEdgeBase + state * sizeof(TEdge));
(int)grammarFile.ReadUIntVariableSize(FirstEdgeBase + state * _edgeIndexSize, _edgeIndexSize);

protected int ReadState(ReadOnlySpan<byte> grammarFile, int @base, int index) =>
(int)grammarFile.ReadUIntVariableSize(@base + index * _stateIndexSize, _stateIndexSize) - 1;

protected static int ReadState(ReadOnlySpan<byte> grammarFile, int @base) =>
(int)grammarFile.ReadUIntVariableSize<TState>(@base) - 1;
protected TokenSymbolHandle ReadAcceptSymbol(ReadOnlySpan<byte> grammarFile, int index) =>
new(grammarFile.ReadUIntVariableSize(AcceptBase + index * _tokenSymbolIndexSize, _tokenSymbolIndexSize));

internal sealed override Grammar Grammar { get; }

private int GetDefaultTransitionUnsafe(ReadOnlySpan<byte> grammarFile, int state)
{
return ReadState(grammarFile, DefaultTransitionBase + state * sizeof(TState));
return ReadState(grammarFile, DefaultTransitionBase, state);
}

private (int Offset, int Count) GetEdgeBoundsUnsafe(ReadOnlySpan<byte> grammarFile, int state)
Expand All @@ -55,7 +62,7 @@ private DfaEdge<TChar> GetEdgeAtUnsafe(ReadOnlySpan<byte> grammarFile, int index
{
TChar cFrom = StateMachineUtilities.Read<TChar>(grammarFile, RangeFromBase + index * sizeof(char));
TChar cTo = StateMachineUtilities.Read<TChar>(grammarFile, RangeToBase + index * sizeof(char));
int target = ReadState(grammarFile, EdgeTargetBase + index * sizeof(TState));
int target = ReadState(grammarFile, EdgeTargetBase, + index);

return new(cFrom, cTo, target);
}
Expand Down
65 changes: 23 additions & 42 deletions src/FarkleNeo/Grammars/StateMachines/DfaWithConflicts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,70 +2,54 @@
// SPDX-License-Identifier: MIT

using Farkle.Buffers;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;

namespace Farkle.Grammars.StateMachines;

internal unsafe sealed class DfaWithConflicts<TChar, TState, TEdge, TTokenSymbol, TAccept> : DfaImplementationBase<TChar, TState, TEdge> where TChar : unmanaged, IComparable<TChar>
internal unsafe sealed class DfaWithConflicts<TChar> : DfaImplementationBase<TChar> where TChar : unmanaged, IComparable<TChar>
{
private readonly byte _acceptIndexSize;

private readonly int _acceptCount;

internal required int FirstAcceptBase { get; init; }

internal required int AcceptBase { get; init; }

public DfaWithConflicts(Grammar grammar, int stateCount, int edgeCount, int acceptCount) : base(grammar, stateCount, edgeCount, true)
[SetsRequiredMembers]
public DfaWithConflicts(Grammar grammar, int stateCount, int edgeCount, int acceptCount, int tokenSymbolCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions)
: base(grammar, stateCount, edgeCount, tokenSymbolCount, true)
{
Debug.Assert(GrammarUtilities.GetCompressedIndexSize(acceptCount) == sizeof(TAccept));

_acceptIndexSize = GrammarUtilities.GetCompressedIndexSize(acceptCount);
_acceptCount = acceptCount;
}

public static DfaWithConflicts<TChar, TState, TEdge, TTokenSymbol, TAccept> Create(Grammar grammar, int stateCount, int edgeCount, int acceptCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions)
{
int expectedSize =
sizeof(uint) * 3
+ stateCount * sizeof(TEdge)
+ stateCount * _edgeIndexSize
+ edgeCount * sizeof(TChar) * 2
+ edgeCount * sizeof(TState)
+ stateCount * sizeof(TAccept)
+ acceptCount * sizeof(TTokenSymbol);
+ edgeCount * _stateIndexSize
+ stateCount * _acceptIndexSize
+ acceptCount * _tokenSymbolIndexSize;

if (dfa.Length != expectedSize)
{
ThrowHelpers.ThrowInvalidDfaDataSize();
}

int firstEdgeBase = dfa.Offset + sizeof(uint) * 3;
int rangeFromBase = firstEdgeBase + stateCount * sizeof(TEdge);
int rangeToBase = rangeFromBase + edgeCount * sizeof(TChar);
int edgeTargetBase = rangeToBase + edgeCount * sizeof(TChar);
int firstAcceptBase = edgeTargetBase + edgeCount * sizeof(TState);
int acceptBase = firstAcceptBase + stateCount * sizeof(TAccept);

if (dfaDefaultTransitions.Length > 0)
if (dfaDefaultTransitions.Length > 0 && dfaDefaultTransitions.Length != stateCount * _stateIndexSize)
{
if (dfaDefaultTransitions.Length != stateCount * sizeof(TState))
{
ThrowHelpers.ThrowInvalidDfaDataSize();
}
ThrowHelpers.ThrowInvalidDfaDataSize();
}

return new(grammar, stateCount, edgeCount, acceptCount)
{
FirstEdgeBase = firstEdgeBase,
RangeFromBase = rangeFromBase,
RangeToBase = rangeToBase,
EdgeTargetBase = edgeTargetBase,
DefaultTransitionBase = dfaDefaultTransitions.Offset,
FirstAcceptBase = firstAcceptBase,
AcceptBase = acceptBase
};
FirstEdgeBase = dfa.Offset + sizeof(uint) * 3;
RangeFromBase = FirstEdgeBase + stateCount * _edgeIndexSize;
RangeToBase = RangeFromBase + edgeCount * sizeof(TChar);
EdgeTargetBase = RangeToBase + edgeCount * sizeof(TChar);
DefaultTransitionBase = dfaDefaultTransitions.Offset;
FirstAcceptBase = EdgeTargetBase + edgeCount * _stateIndexSize;
AcceptBase = FirstAcceptBase + stateCount * _acceptIndexSize;
}

private int ReadFirstAccept(ReadOnlySpan<byte> grammarFile, int state) =>
(int)grammarFile.ReadUIntVariableSize<TAccept>(FirstAcceptBase + state * sizeof(TAccept));
(int)grammarFile.ReadUIntVariableSize(FirstAcceptBase + state * _acceptIndexSize, _acceptIndexSize);

private (int Offset, int Count) GetAcceptSymbolBoundsUnsafe(ReadOnlySpan<byte> grammarFile, int state)
{
Expand All @@ -74,9 +58,6 @@ private int ReadFirstAccept(ReadOnlySpan<byte> grammarFile, int state) =>
return (firstAccept, nextFirstAccept - firstAccept);
}

private TokenSymbolHandle GetAcceptSymbolAtUnsafe(ReadOnlySpan<byte> grammarFile, int index) =>
new(grammarFile.ReadUIntVariableSize<TTokenSymbol>(AcceptBase + index * sizeof(TTokenSymbol)));

internal override (int Offset, int Count) GetAcceptSymbolBounds(int state)
{
ValidateStateIndex(state);
Expand All @@ -89,7 +70,7 @@ internal override TokenSymbolHandle GetAcceptSymbolAt(int index)
{
ThrowHelpers.ThrowArgumentOutOfRangeException(nameof(index));
}
return GetAcceptSymbolAtUnsafe(Grammar.GrammarFile, index);
return ReadAcceptSymbol(Grammar.GrammarFile, index);
}

internal override void ValidateContent(ReadOnlySpan<byte> grammarFile, in GrammarTables grammarTables)
Expand All @@ -111,7 +92,7 @@ internal override void ValidateContent(ReadOnlySpan<byte> grammarFile, in Gramma

for (int i = 0; i < _acceptCount; i++)
{
TokenSymbolHandle acceptSymbol = GetAcceptSymbolAtUnsafe(grammarFile, i);
TokenSymbolHandle acceptSymbol = ReadAcceptSymbol(grammarFile, i);
grammarTables.ValidateHandle(acceptSymbol);
}

Expand Down
59 changes: 22 additions & 37 deletions src/FarkleNeo/Grammars/StateMachines/DfaWithoutConflicts.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
// Copyright © Theodore Tsirpanis and Contributors.
// SPDX-License-Identifier: MIT

using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using Farkle.Buffers;

namespace Farkle.Grammars.StateMachines;

internal unsafe sealed class DfaWithoutConflicts<TChar, TState, TEdge, TTokenSymbol>(Grammar grammar, int stateCount, int edgeCount) : DfaImplementationBase<TChar, TState, TEdge>(grammar, stateCount, edgeCount, false) where TChar : unmanaged, IComparable<TChar>
internal unsafe sealed class DfaWithoutConflicts<TChar> : DfaImplementationBase<TChar> where TChar : unmanaged, IComparable<TChar>
{
internal required int AcceptBase { get; init; }

/// <summary>
/// A lookup table with the next state for each ASCII character, for each starting state.
/// </summary>
Expand All @@ -35,43 +33,33 @@ private static char CastChar(TChar c)

private static bool IsAscii(TChar c) => CastChar(c) < StateMachineUtilities.AsciiCharacterCount;

public static DfaWithoutConflicts<TChar, TState, TEdge, TTokenSymbol> Create(Grammar grammar, int stateCount, int edgeCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions)
[SetsRequiredMembers]
public DfaWithoutConflicts(Grammar grammar, int stateCount, int edgeCount, int tokenSymbolCount, GrammarFileSection dfa, GrammarFileSection dfaDefaultTransitions)
: base(grammar, stateCount, edgeCount, tokenSymbolCount, false)
{
int expectedSize =
sizeof(uint) * 2
+ stateCount * sizeof(TEdge)
+ stateCount * _edgeIndexSize
+ edgeCount * sizeof(TChar) * 2
+ edgeCount * sizeof(TState)
+ stateCount * sizeof(TTokenSymbol);
+ edgeCount * _stateIndexSize
+ stateCount * _tokenSymbolIndexSize;

if (dfa.Length != expectedSize)
{
ThrowHelpers.ThrowInvalidDfaDataSize();
}

int firstEdgeBase = dfa.Offset + sizeof(uint) * 2;
int rangeFromBase = firstEdgeBase + stateCount * sizeof(TEdge);
int rangeToBase = rangeFromBase + edgeCount * sizeof(TChar);
int edgeTargetBase = rangeToBase + edgeCount * sizeof(TChar);
int acceptBase = edgeTargetBase + edgeCount * sizeof(TState);

if (dfaDefaultTransitions.Length > 0)
if (dfaDefaultTransitions.Length > 0 && dfaDefaultTransitions.Length != stateCount * _stateIndexSize)
{
if (dfaDefaultTransitions.Length != stateCount * sizeof(TState))
{
ThrowHelpers.ThrowInvalidDfaDataSize();
}
ThrowHelpers.ThrowInvalidDfaDataSize();
}

return new(grammar, stateCount, edgeCount)
{
FirstEdgeBase = firstEdgeBase,
RangeFromBase = rangeFromBase,
RangeToBase = rangeToBase,
EdgeTargetBase = edgeTargetBase,
DefaultTransitionBase = dfaDefaultTransitions.Offset,
AcceptBase = acceptBase
};
FirstEdgeBase = dfa.Offset + sizeof(uint) * 2;
RangeFromBase = FirstEdgeBase + stateCount * _edgeIndexSize;
RangeToBase = RangeFromBase + edgeCount * sizeof(TChar);
EdgeTargetBase = RangeToBase + edgeCount * sizeof(TChar);
DefaultTransitionBase = dfaDefaultTransitions.Offset;
AcceptBase = EdgeTargetBase + edgeCount * _stateIndexSize;
}

internal override (int Offset, int Count) GetAcceptSymbolBounds(int state)
Expand All @@ -88,13 +76,10 @@ internal override (int Offset, int Count) GetAcceptSymbolBounds(int state)

internal override TokenSymbolHandle GetAcceptSymbolAt(int index) => GetAcceptSymbol(index);

private TokenSymbolHandle GetAcceptSymbolUnsafe(ReadOnlySpan<byte> grammarFile, int state) =>
new(grammarFile.ReadUIntVariableSize<TTokenSymbol>(AcceptBase + state * sizeof(TTokenSymbol)));

private TokenSymbolHandle GetAcceptSymbol(int state)
{
ValidateStateIndex(state);
return GetAcceptSymbolUnsafe(Grammar.GrammarFile, state);
return ReadAcceptSymbol(Grammar.GrammarFile, state);
}

private int NextState(ReadOnlySpan<byte> grammarFile, int state, TChar c)
Expand Down Expand Up @@ -127,13 +112,13 @@ private int NextState(ReadOnlySpan<byte> grammarFile, int state, TChar c)

if (cFrom.CompareTo(c) <= 0 && c.CompareTo(cTo) <= 0)
{
return ReadState(grammarFile, EdgeTargetBase + (edgeOffset + edge) * sizeof(TState));
return ReadState(grammarFile, EdgeTargetBase, edgeOffset + edge);
}
}

if (DefaultTransitionBase != 0)
{
return ReadState(grammarFile, DefaultTransitionBase + state * sizeof(TState));
return ReadState(grammarFile, DefaultTransitionBase, state);
}

return -1;
Expand All @@ -155,7 +140,7 @@ internal override (TokenSymbolHandle AcceptSymbol, int CharactersRead, int Token
{
ignoreLeadingErrors = false;
currentState = nextState;
if (GetAcceptSymbolUnsafe(grammarFile, currentState) is { HasValue: true } s)
if (ReadAcceptSymbol(grammarFile, currentState) is { HasValue: true } s)
{
acceptSymbol = s;
acceptSymbolLength = i + 1;
Expand All @@ -177,7 +162,7 @@ internal override (TokenSymbolHandle AcceptSymbol, int CharactersRead, int Token
acceptSymbol = default;
}

Return:
Return:
if (acceptSymbol.HasValue)
{
return (acceptSymbol, acceptSymbolLength, currentState);
Expand All @@ -196,7 +181,7 @@ internal override void ValidateContent(ReadOnlySpan<byte> grammarFile, in Gramma

for (int state = 0; state < Count; state++)
{
TokenSymbolHandle acceptSymbol = GetAcceptSymbolUnsafe(grammarFile, state);
TokenSymbolHandle acceptSymbol = ReadAcceptSymbol(grammarFile, state);
if (acceptSymbol.HasValue)
{
grammarTables.ValidateHandle(acceptSymbol);
Expand Down
Loading
Loading