Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parser implementation #139

Merged
merged 20 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c6c5584
Add classes representing parser messages and lexical errors.
teo-tsirpanis Aug 15, 2023
a7b98b6
Start writing the default tokenizer.
teo-tsirpanis Aug 16, 2023
ddfcc37
Add an optimized stack type.
teo-tsirpanis Aug 16, 2023
95037d7
Add internal grammar APIs related to groups.
teo-tsirpanis Aug 17, 2023
9d2a0a9
Support tokenizing groups.
teo-tsirpanis Aug 20, 2023
1be2ba8
Update documentation.
teo-tsirpanis Aug 18, 2023
4d768c6
Add the default parser implementation.
teo-tsirpanis Aug 21, 2023
50479b2
Format and localize syntax errors.
teo-tsirpanis Aug 21, 2023
0b34d83
Add APIs to create `CharParser`s.
teo-tsirpanis Aug 24, 2023
d13f5c4
Rename `ParserCommon` to `ParserUtilities`.
teo-tsirpanis Aug 24, 2023
bc3a169
Fix bugs and SonarCloud suggestions.
teo-tsirpanis Aug 27, 2023
0540aad
Do not discard the last token of a non-final block if the DFA cannot …
teo-tsirpanis Aug 27, 2023
4349edf
Wrap the result of `Tokenizer.Create` in a chain.
teo-tsirpanis Aug 27, 2023
4780068
Fix a nullability mismatch.
teo-tsirpanis Aug 27, 2023
7f0fbd6
Add some tests for the tokenizer.
teo-tsirpanis Aug 28, 2023
dd69b7b
Fix bugs and oversights in the grammar code.
teo-tsirpanis Aug 28, 2023
55b8450
Track in ParserState if suspending the tokenizer is supported.
teo-tsirpanis Aug 28, 2023
5983a99
Add a benchmark comparing parsing JSON in Farkle 6 and 7.
teo-tsirpanis Aug 29, 2023
3958c81
Fix a bug in `CharacterBufferManager`.
teo-tsirpanis Aug 29, 2023
112e674
Push the initial state to the LR parser stack.
teo-tsirpanis Aug 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions designs/7.0/parser-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,14 @@ public abstract class Tokenizer<TChar>

public abstract bool TryGetNextToken(ref ParserInputReader<TChar> input, ITokenSemanticProvider<TChar> semanticProvider, out TokenizerResult result);
}

public static class Tokenizer
{
// Creates a standalone tokenizer from a grammar. Char is the only supported character type.
// If the grammar cannot be used for tokenizing or the character type is unsupported,
// the method will throw.
public static Tokenizer<TChar> Create(Grammar grammar);
}
```

It resembles the `Tokenizer` class of Farkle 6, with the following differences:
Expand Down
47 changes: 47 additions & 0 deletions performance/Farkle.Benchmarks.CSharp/JsonBenchmark.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright © Theodore Tsirpanis and Contributors.
// SPDX-License-Identifier: MIT

#nullable disable

extern alias farkle6;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using Farkle.Grammars;
using Farkle6 = farkle6::Farkle;

namespace Farkle.Benchmarks.CSharp;

[MemoryDiagnoser, GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
public class JsonBenchmark
{
[Params("small.json", "medium.json", "big.json")] public string FileName { get; set; }

private byte[] _jsonBytes;

private string _jsonText;

private Farkle6.RuntimeFarkle<object> _farkle6Runtime;

private CharParser<object> _farkle7Parser;

[GlobalSetup]
public void GlobalSetup()
{
_jsonBytes = File.ReadAllBytes($"resources/{FileName}");
_jsonText = File.ReadAllText($"resources/{FileName}");
_farkle6Runtime = Farkle6.RuntimeFarkle<object>.Create(Farkle6.Grammar.EGT.ReadFromFile("resources/JSON.egt"), Farkle6.PostProcessors.SyntaxChecker);
_farkle7Parser = CharParser.CreateSyntaxChecker(Grammar.Create(File.ReadAllBytes("resources/JSON.grammar.dat")));
}

[Benchmark(Baseline = true), BenchmarkCategory("MemoryInput")]
public object Farkle6String() => _farkle6Runtime.Parse(_jsonText).ResultValue;

[Benchmark, BenchmarkCategory("MemoryInput")]
public object Farkle7String() => _farkle7Parser.Parse(_jsonText).Value;

[Benchmark(Baseline = true), BenchmarkCategory("StreamingInput")]
public object Farkle6Stream() => _farkle6Runtime.Parse(new StreamReader(new MemoryStream(_jsonBytes, false))).ResultValue;

[Benchmark, BenchmarkCategory("StreamingInput")]
public object Farkle7Stream() => _farkle7Parser.Parse(new StreamReader(new MemoryStream(_jsonBytes, false))).Value;
}
70 changes: 70 additions & 0 deletions src/FarkleNeo/CharParser.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// Copyright © Theodore Tsirpanis and Contributors.
// SPDX-License-Identifier: MIT

using Farkle.Diagnostics;
using Farkle.Grammars;
using Farkle.Parser;
using Farkle.Parser.Implementation;
using Farkle.Parser.Semantics;
using Farkle.Parser.Tokenizers;

Expand Down Expand Up @@ -193,3 +195,71 @@ public CharParser<T> WithTokenizer(ChainedTokenizerBuilder<char> builder)
return WithTokenizerCore(builder);
}
}

/// <summary>
/// Provides factory methods to create <see cref="CharParser{T}"/>s.
/// </summary>
public static class CharParser
{
/// <summary>
/// Creates a <see cref="CharParser{T}"/>.
/// </summary>
/// <typeparam name="T">The type of objects the parser will produce in case of success.</typeparam>
/// <param name="grammar">The <see cref="Grammar"/> the parser will use.</param>
/// <param name="semanticProvider">The <see cref="ISemanticProvider{TChar, T}"/> the parser will use.</param>
/// <exception cref="ArgumentNullException"><paramref name="grammar"/> or <paramref name="semanticProvider"/>
/// is <see langword="null"/>.</exception>
public static CharParser<T> Create<T>(Grammar grammar, ISemanticProvider<char, T> semanticProvider)
{
ArgumentNullExceptionCompat.ThrowIfNull(grammar);
ArgumentNullExceptionCompat.ThrowIfNull(semanticProvider);

if (grammar.IsUnparsable(out string? errorKey))
{
return Fail(errorKey);
}
if (grammar.LrStateMachine is not { HasConflicts: false } lrStateMachine)
{
return Fail(nameof(Resources.Parser_GrammarLrProblem));
}

Tokenizer<char> tokenizer = Tokenizer.Create<char>(grammar, throwIfError: false);
return new DefaultParser<T>(grammar, lrStateMachine, semanticProvider, tokenizer);

CharParser<T> Fail(string resourceKey) => new FailingCharParser<T>(new LocalizedDiagnostic(resourceKey), grammar);
}

/// <summary>
/// Creates a <see cref="CharParser{T}"/> that does not perform any semantic analysis.
/// </summary>
/// <typeparam name="T">The type of objects the syntax checker will return in case of success.
/// Must be a reference type and usually it is <see cref="object"/>
/// or <see cref="T:Microsoft.FSharp.Core.Unit"/>.</typeparam>
/// <param name="grammar">The <see cref="Grammar"/> the syntax checker will use.</param>
/// <exception cref="ArgumentNullException"><paramref name="grammar"/> is <see langword="null"/>.</exception>
/// <remarks>Syntax checkers always return <see langword="null"/> in case of success.</remarks>
public static CharParser<T?> CreateSyntaxChecker<T>(Grammar grammar) where T : class =>
Create(grammar, SyntaxChecker<char, T>.Instance);

/// <summary>
/// Creates a <see cref="CharParser{T}"/> that does not perform any semantic analysis.
/// </summary>
/// <param name="grammar">The <see cref="Grammar"/> the syntax checker will use.</param>
/// <remarks>Syntax checkers always return <see langword="null"/> in case of success.</remarks>
public static CharParser<object?> CreateSyntaxChecker(Grammar grammar) =>
CreateSyntaxChecker<object>(grammar);

/// <summary>
/// Converts a <see cref="CharParser{T}"/> to a syntax checker with a user-defined return type.
/// </summary>
/// <seealso cref="CreateSyntaxChecker{T}(Grammar)"/>
public static CharParser<TNew?> ToSyntaxChecker<T, TNew>(this CharParser<T> parser) where TNew : class =>
parser.WithSemanticProvider(SyntaxChecker<char, TNew>.Instance);

/// <summary>
/// Converts a <see cref="CharParser{T}"/> to a syntax checker.
/// </summary>
/// <seealso cref="CreateSyntaxChecker(Grammar)"/>
public static CharParser<object?> ToSyntaxChecker<T>(this CharParser<T> parser) =>
parser.ToSyntaxChecker<T, object>();
}
189 changes: 189 additions & 0 deletions src/FarkleNeo/Collections/ValueStack.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
// Copyright © Theodore Tsirpanis and Contributors.
// SPDX-License-Identifier: MIT

using System.Buffers;
using System.Diagnostics;
#if NETCOREAPP || NETSTANDARD2_1_OR_GREATER
using System.Runtime.CompilerServices;
#endif

namespace Farkle.Collections;

/// <summary>
/// A stack type that can store its items in stack-allocated memory.
/// </summary>
[DebuggerDisplay("Count = {Count}")]
internal ref struct ValueStack<T>
{
private Span<T> _items;
private T[]? _pooledArray;
private int _count;

private const int InitialCapacity = 4;

private static bool ShouldResetItems =>
#if NETCOREAPP || NETSTANDARD2_1_OR_GREATER
RuntimeHelpers.IsReferenceOrContainsReferences<T>();
#else
// On .NET Standard 2.0 it might return false positives but that's fine.
// We will use this value only for optimizations.
!typeof(T).IsPrimitive;
#endif

public ValueStack(int initialCapacity)
{
ArgumentOutOfRangeExceptionCompat.ThrowIfNegative(initialCapacity);
_items = _pooledArray = ArrayPool<T>.Shared.Rent(initialCapacity);
_count = 0;
}

public ValueStack(Span<T> items)
{
_items = items;
_pooledArray = null;
_count = 0;
}

public ValueStack(State state)
{
_items = _pooledArray = state.Items;
_count = state.Count;
}

private void Grow()
{
int newCapacity = _items.Length switch
{
0 => InitialCapacity,
var length => length * 2
};
T[] newArray = ArrayPool<T>.Shared.Rent(newCapacity);
_items.CopyTo(newArray);
if (_pooledArray is not null)
{
if (ShouldResetItems)
{
_pooledArray.AsSpan().Clear();
}
ArrayPool<T>.Shared.Return(_pooledArray);
}
_items = _pooledArray = newArray;
}

public readonly int Count => _count;

[DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
public readonly Span<T> AllItems => _items[.._count];

public void Push(T item)
{
if (_count == _items.Length)
{
Grow();
}
_items[_count++] = item;
}

public T Pop()
{
if (_count == 0)
{
ThrowHelpers.ThrowInvalidOperationException();
}

ref T resultRef = ref _items[_count - 1];
_count--;
T result = resultRef;
if (ShouldResetItems)
{
resultRef = default!;
}
return result;
}

public void PopMany(int itemsToPop)
{
if ((uint)itemsToPop > (uint)_count)
{
ThrowHelpers.ThrowArgumentOutOfRangeException(nameof(itemsToPop));
}

if (ShouldResetItems)
{
_items[^itemsToPop..].Clear();
}
_count -= itemsToPop;
}

public void Clear()
{
if (_count == 0)
{
return;
}
if (ShouldResetItems)
{
_items[.._count].Clear();
}
_count = 0;
}

public void Dispose()
{
Clear();
if (_pooledArray is not null)
{
ArrayPool<T>.Shared.Return(_pooledArray);
_pooledArray = null;
}
}

public readonly T Peek(int indexFromTheEnd = 0)
{
if ((uint)indexFromTheEnd >= (uint)_count)
{
ThrowHelpers.ThrowArgumentOutOfRangeException(nameof(indexFromTheEnd));
}
return _items[_count - 1 - indexFromTheEnd];
}

public readonly Span<T> PeekMany(int itemsToPeek)
{
if ((uint)itemsToPeek > (uint)_count)
{
ThrowHelpers.ThrowArgumentOutOfRangeException(nameof(itemsToPeek));
}
return _items.Slice(_count - itemsToPeek, itemsToPeek);
}

public State ExportState()
{
if (_pooledArray is null)
{
_pooledArray = ArrayPool<T>.Shared.Rent(_items.Length);
_items.CopyTo(_pooledArray);
_items = _pooledArray;
}
return new State(_pooledArray, _count);
}

public readonly struct State : IDisposable
{
public readonly T[] Items;
public readonly int Count;

internal State(T[] items, int count)
{
Items = items;
Count = count;
}

public void Dispose()
{
if (Items is not null)
{
ArrayPool<T>.Shared.Return(Items);
}
}
}
}
49 changes: 49 additions & 0 deletions src/FarkleNeo/Diagnostics/DelimitedString.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright © Theodore Tsirpanis and Contributors.
// SPDX-License-Identifier: MIT

using System.Collections.Immutable;
using System.Text;

namespace Farkle.Diagnostics;

internal readonly struct DelimitedString
{
private readonly ImmutableArray<string?> _values;
private readonly string _delimiter;
private readonly string _fallback;
private readonly Func<string, string> _valueTransform;

public DelimitedString(ImmutableArray<string?> values, string delimiter, string fallback, Func<string, string>? valueTransform = null)
{
_values = values;
_delimiter = delimiter;
_fallback = fallback;
_valueTransform = valueTransform ?? (x => x);
}

public override string ToString()
{
switch (_values)
{
case []: return string.Empty;
case [null]: return _fallback;
case [var x]: return _valueTransform(x);
}

StringBuilder sb = new();
bool first = true;
foreach (string? value in _values)
{
if (first)
{
first = false;
}
else
{
sb.Append(_delimiter);
}
sb.Append(value is null ? _fallback : _valueTransform(value));
}
return sb.ToString();
}
}
Loading
Loading