Skip to content

Commit 8798153

Browse files
authored
Update REPL multiline logic and improve identifier parse error messages (#2824)
The REPL attempts to determine if a line continuation is needed to create a valid formula. Today, it does this by using the Power Fx parser and attempting to interpret the error messages, which turns out doesn't work so well. It also isn't very smart about operators and doesn't handle named formulas or UDFs. For example, these would not continue: - `IsMatch( "asdf", "(?x)` long regex best started on the next line - `NamedFormula =` long named formula best started on the next line - `Func( x: Number ): Text =` long udf, best started on the next line - `1 + ` long operand, best started on the next line The new logic is written by hand specifically to answer this continuation question and does a much better job. It is always a best guess and only determines when to send the formula to the parser/eval loop - annoyance either being too conservative or liberal is all that this controls. Even if a continuation is detected where there should not be one, the user can always enter a blank line to conclude the continuation. Not detecting a continuation will be annoying and should result an issue being raised, but the user can always remove the extra newline. Yes, we'll need to add logic in two places if the parser changes significantly, although that is not anticipated anytime soon. If this becomes a burden long term, we can look at enhancing the mainline parser to detect continuation situations, but at this point that feels like the REPL tail wagging the Power Fx dog. In the process of learning how identifiers were being parsed, I found the error messages were very poor for an identifier parser error. I didn't change the logic, just updated the messages and added some more tests.
1 parent 222fbe9 commit 8798153

File tree

6 files changed

+438
-43
lines changed

6 files changed

+438
-43
lines changed

src/libraries/Microsoft.PowerFx.Core/Localization/Strings.cs

+1
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ internal static class TexlStrings
692692
public static ErrorResourceKey ErrCannotCoerce_SourceType_TargetType = new ErrorResourceKey("ErrCannotCoerce_SourceType_TargetType");
693693
public static ErrorResourceKey ErrNumberOrStringExpected = new ErrorResourceKey("ErrNumberOrStringExpected");
694694
public static ErrorResourceKey ErrClosingBracketExpected = new ErrorResourceKey("ErrClosingBracketExpected");
695+
public static ErrorResourceKey ErrClosingIdentifierExpected = new ErrorResourceKey("ErrClosingIdentifierExpected");
695696
public static ErrorResourceKey ErrEmptyInvalidIdentifier = new ErrorResourceKey("ErrEmptyInvalidIdentifier");
696697
public static ErrorResourceKey ErrIncompatibleTypes = new ErrorResourceKey("ErrIncompatibleTypes");
697698
public static ErrorResourceKey ErrIncompatibleTypesForEquality_Left_Right = new ErrorResourceKey("ErrIncompatibleTypesForEquality_Left_Right");

src/libraries/Microsoft.PowerFx.Core/Parser/TexlParser.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1429,13 +1429,13 @@ private Identifier ParseIdentifier(Token at = null)
14291429
if (_curs.TidCur == TokKind.Ident)
14301430
{
14311431
tok = _curs.TokMove().As<IdentToken>();
1432-
if (tok.HasDelimiterStart && !tok.HasDelimiterEnd)
1432+
if (tok.IsModified)
14331433
{
1434-
PostError(tok, TexlStrings.ErrClosingBracketExpected);
1434+
PostError(tok, TexlStrings.ErrEmptyInvalidIdentifier);
14351435
}
1436-
else if (tok.IsModified)
1436+
else if (tok.HasDelimiterStart && !tok.HasDelimiterEnd)
14371437
{
1438-
PostError(tok, TexlStrings.ErrEmptyInvalidIdentifier);
1438+
PostError(tok, TexlStrings.ErrClosingIdentifierExpected);
14391439
}
14401440
}
14411441
else

src/libraries/Microsoft.PowerFx.Repl/Services/MultilineProcessor.cs

+242-24
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
// Licensed under the MIT license.
33

44
using System;
5+
using System.Collections.Generic;
56
using System.Linq;
67
using System.Linq.Expressions;
78
using System.Text;
89
using System.Text.RegularExpressions;
910
using Microsoft.PowerFx.Core.Localization;
11+
using Microsoft.PowerFx.Core.Utils;
1012
using Microsoft.PowerFx.Repl.Functions;
1113
using Microsoft.PowerFx.Syntax;
1214
using Microsoft.PowerFx.Types;
@@ -23,37 +25,251 @@ public class MultilineProcessor
2325
// false if we're on a subsequent line.
2426
public bool IsFirstLine => _commandBuffer.Length == 0;
2527

26-
// Return null if we need more input.
27-
// else return string containing multiple lines together.
28-
public virtual string HandleLine(string line, ParserOptions parserOptions)
28+
// Recursively parses a formula to see if there are any open (, {, [, comments, strings, or if the formula ends with a unary prefix or binary operator
29+
// Recursion happens for string interoplation
30+
// An earlier version of this routine attempted to use the Power Fx parser directly, but interpreting erorr messages to determine continuation situations was not accurate enough
31+
private int ParseFormulaToClose(int bufferIndex, bool closeOnCurly, ref bool complete, ref bool error)
2932
{
30-
_commandBuffer.AppendLine(line);
33+
var brackets = new Stack<char>(); // stack of [, {, ( to ensure proper matching
34+
var close = false; // exit the loop as we've found the closing } if cloneOnCurly is true
35+
var leftOpen = false; // an identfier, string, or inline comment was left open when the end of the string was encountered
36+
var lastOperator = false; // was the last non-whitespace, non-comment character an operator?
37+
38+
if (closeOnCurly)
39+
{
40+
brackets.Push('}');
41+
}
42+
43+
for (; !close && !error && !leftOpen && bufferIndex < _commandBuffer.Length; bufferIndex++)
44+
{
45+
switch (_commandBuffer[bufferIndex])
46+
{
47+
// text string
48+
case '"':
49+
var stringInterpolation = bufferIndex > 0 && _commandBuffer[bufferIndex - 1] == '$';
50+
51+
for (bufferIndex++; bufferIndex < _commandBuffer.Length; bufferIndex++)
52+
{
53+
if (_commandBuffer[bufferIndex] == '"')
54+
{
55+
if (bufferIndex + 1 < _commandBuffer.Length && _commandBuffer[bufferIndex + 1] == '"')
56+
{
57+
// skip repeated quote
58+
bufferIndex++;
59+
}
60+
else
61+
{
62+
// end delimiter reached
63+
break;
64+
}
65+
}
66+
else if (stringInterpolation && _commandBuffer[bufferIndex] == '{')
67+
{
68+
if (bufferIndex + 1 < _commandBuffer.Length && _commandBuffer[bufferIndex + 1] == '{')
69+
{
70+
// skip repeated {
71+
bufferIndex++;
72+
}
73+
else
74+
{
75+
// recurse in for string interpolation island
76+
bufferIndex = ParseFormulaToClose(bufferIndex + 1, true, ref complete, ref error);
77+
}
78+
}
79+
}
80+
81+
// reached end of string before we found our ending delimiter
82+
if (bufferIndex == _commandBuffer.Length)
83+
{
84+
leftOpen = true;
85+
}
86+
87+
lastOperator = false;
88+
break;
89+
90+
// delimited identifier, which can't span lines but may have other characters within that should be ignored
91+
case '\'':
92+
for (bufferIndex++; bufferIndex < _commandBuffer.Length; bufferIndex++)
93+
{
94+
if (_commandBuffer[bufferIndex] == '\'')
95+
{
96+
if (bufferIndex + 1 < _commandBuffer.Length && _commandBuffer[bufferIndex + 1] == '\'')
97+
{
98+
// skip repeated quote
99+
bufferIndex++;
100+
}
101+
else
102+
{
103+
// end delimiter reached
104+
break;
105+
}
106+
}
107+
else if (CharacterUtils.IsTabulation(_commandBuffer[bufferIndex]) || CharacterUtils.IsLineTerm(_commandBuffer[bufferIndex]))
108+
{
109+
// invalid in identifier names
110+
error = true;
111+
break;
112+
}
113+
}
114+
115+
// reached end of string before we found our ending delimiter
116+
if (bufferIndex == _commandBuffer.Length)
117+
{
118+
leftOpen = true;
119+
}
120+
121+
lastOperator = false;
122+
break;
123+
124+
// comments or division operator
125+
case '/':
126+
if (bufferIndex + 1 < _commandBuffer.Length)
127+
{
128+
if (_commandBuffer[bufferIndex + 1] == '/')
129+
{
130+
for (bufferIndex += 2; bufferIndex < _commandBuffer.Length && _commandBuffer[bufferIndex] != '\n' && _commandBuffer[bufferIndex] != '\r'; bufferIndex++)
131+
{
132+
}
133+
134+
// no leftOpen, the comment is closed by the end of the buffer without an error
135+
}
136+
else if (_commandBuffer[bufferIndex + 1] == '*')
137+
{
138+
for (bufferIndex += 2; bufferIndex + 1 < _commandBuffer.Length && !(_commandBuffer[bufferIndex] == '*' && _commandBuffer[bufferIndex + 1] == '/'); bufferIndex++)
139+
{
140+
}
141+
142+
// reached end of comment before we found our ending delimiter
143+
if (bufferIndex + 1 >= _commandBuffer.Length)
144+
{
145+
leftOpen = true;
146+
}
147+
else
148+
{
149+
// skip past closing '/'
150+
bufferIndex++;
151+
}
152+
}
153+
else
154+
{
155+
// division operator
156+
lastOperator = true;
157+
}
158+
}
159+
else
160+
{
161+
// division operator at end of buffer
162+
lastOperator = true;
163+
}
164+
165+
// if it was indeed a comment and not division, lastOperator not updated, comments ignored
166+
break;
167+
168+
// table notation
169+
case '[':
170+
brackets.Push(']');
171+
lastOperator = false;
172+
break;
173+
174+
// function parameters and grouping notation
175+
case '(':
176+
brackets.Push(')');
177+
lastOperator = false;
178+
break;
179+
180+
// record notation
181+
case '{':
182+
brackets.Push('}');
183+
lastOperator = false;
184+
break;
185+
186+
// closing notation
187+
case ']':
188+
case ')':
189+
case '}':
190+
if (lastOperator || brackets.Count == 0 || _commandBuffer[bufferIndex] != brackets.Pop())
191+
{
192+
error = true;
193+
}
31194

32-
var commandBufferString = _commandBuffer.ToString();
195+
// if no error, brackets.Count has been decremented after brackets.Pop for this closing bracket in the first test
196+
else if (brackets.Count == 0 && _commandBuffer[bufferIndex] == '}' && closeOnCurly)
197+
{
198+
close = true;
199+
}
33200

34-
// We use the parser results to determine if the command is complete or more lines are needed.
35-
// The Engine features and parser options do not need to match what we will actually use,
36-
// this just needs to be good enough to detect the errors below for multiline processing.
37-
var result = Engine.Parse(commandBufferString, options: parserOptions);
201+
lastOperator = false;
202+
break;
38203

39-
// We will get this error, with this argument, if we are missing closing punctuation.
40-
var missingClose = result.Errors.Any(error => error.MessageKey == "ErrExpectedFound_Ex_Fnd" &&
41-
((TokKind)error.MessageArgs.Last() == TokKind.ParenClose ||
42-
(TokKind)error.MessageArgs.Last() == TokKind.CurlyClose ||
43-
(TokKind)error.MessageArgs.Last() == TokKind.BracketClose));
204+
// whitespace
205+
case ' ':
206+
case '\t':
207+
case '\n':
208+
case '\r':
209+
// lastOperator not updated, whitepace ignored
210+
break;
44211

45-
// However, we will get false positives from the above if the statement is very malformed.
46-
// For example: Mid("a", 2,) where the second error about ParenClose expected at the end is incorrect.
47-
// In this case, more characters will not help and we should complete the command and report the errors with what we have.
48-
var badToken = result.Errors.Any(error => error.MessageKey == "ErrBadToken");
212+
// binary and unary prefix operators that can't end a formula and may well be continued on the next line
213+
case '=':
214+
case '>':
215+
case '<':
216+
case ':':
217+
case '+':
218+
case '-':
219+
case '*': // division is handled above with comments
220+
case '^':
221+
case '&': // string concatenation and &&
222+
case '|': // ||
223+
case '!': // not and old property selector
224+
lastOperator = true;
225+
break;
226+
227+
// everything else
228+
default:
229+
lastOperator = false;
230+
break;
231+
}
232+
}
233+
234+
complete &= brackets.Count == 0 && !leftOpen && !lastOperator;
235+
236+
return bufferIndex;
237+
}
238+
239+
// Return null if we need more input.
240+
// else return string containing multiple lines together.
241+
public virtual string HandleLine(string line, ParserOptions parserOptions)
242+
{
243+
_commandBuffer.AppendLine(line);
244+
245+
var complete = true; // the buffer is complete, no longer continue, any open phrase will make this false
246+
var error = false; // an error was encountered, no longer continue, overriding a false complete
49247

50248
// An empty line (possibly with spaces) will also finish the command.
51-
// This is the ultimate escape hatch for the user if our closing detection logic above fails.
52-
var emptyLine = Regex.IsMatch(commandBufferString, @"\n[ \t]*\r?\n$", RegexOptions.Multiline);
249+
// This is the ultimate escape hatch if our closing detection logic above fails.
250+
var emptyLine = line.Trim() == string.Empty;
53251

54-
if (!missingClose || badToken || emptyLine)
252+
if (!emptyLine)
55253
{
56-
Clear();
254+
if (parserOptions.TextFirst)
255+
{
256+
for (int bufferIndex = 0; complete && bufferIndex >= 0 && bufferIndex < _commandBuffer.Length; bufferIndex++)
257+
{
258+
if ((bufferIndex == 0 || _commandBuffer[bufferIndex - 1] != '$') && _commandBuffer[bufferIndex] == '$' && bufferIndex + 1 < _commandBuffer.Length && _commandBuffer[bufferIndex + 1] == '{')
259+
{
260+
bufferIndex = ParseFormulaToClose(bufferIndex + 2, true, ref complete, ref error);
261+
}
262+
}
263+
}
264+
else
265+
{
266+
ParseFormulaToClose(0, false, ref complete, ref error);
267+
}
268+
}
269+
270+
if (complete || error || emptyLine)
271+
{
272+
string commandBufferString = _commandBuffer.ToString();
57273

58274
// Removes one newline from the end (\r\n or just \n) for the enter provided by the user.
59275
// Important for TextFirst lexer mode where a newline would be significant.
@@ -63,9 +279,11 @@ public virtual string HandleLine(string line, ParserOptions parserOptions)
63279
}
64280
else if (commandBufferString.EndsWith("\n", StringComparison.InvariantCulture))
65281
{
66-
commandBufferString = commandBufferString.Substring(0, commandBufferString.Length - 1);
282+
commandBufferString = commandBufferString.Substring(0, _commandBuffer.Length - 1);
67283
}
68-
284+
285+
Clear();
286+
69287
return commandBufferString;
70288
}
71289
else

src/strings/PowerFxResources.en-US.resx

+13-2
Original file line numberDiff line numberDiff line change
@@ -3705,12 +3705,23 @@
37053705
<value>Edit your formula so that it includes a bracket.</value>
37063706
<comment>1 How to fix the error. </comment>
37073707
</data>
3708+
<data name="ErrorResource_ErrClosingIdentifierExpected_ShortMessage" xml:space="preserve">
3709+
<value>Expected closing of identifier. We expect a closing (') at this point in the formula.</value>
3710+
<comment>Error Message.</comment>
3711+
</data>
3712+
<data name="ErrorResource_ErrClosingIdentifierExpected_LongMessage" xml:space="preserve">
3713+
<value>A closing single quote indicates the end of an identifier (for example, 'First Name').</value>
3714+
</data>
3715+
<data name="ErrorResource_ErrClosingIdentifierExpected_HowToFix_1" xml:space="preserve">
3716+
<value>Edit your formula so that it includes a closing single quote.</value>
3717+
<comment>1 How to fix the error. </comment>
3718+
</data>
37083719
<data name="ErrorResource_ErrEmptyInvalidIdentifier_ShortMessage" xml:space="preserve">
3709-
<value>The identifier has no valid text.</value>
3720+
<value>The identifier is empty or contains an invalid character, such as a tab or newline.</value>
37103721
<comment>Error Message.</comment>
37113722
</data>
37123723
<data name="ErrorResource_ErrEmptyInvalidIdentifier_HowToFix_1" xml:space="preserve">
3713-
<value>Ensure you have text for your identifier. This error occurs when the identifier is all blanks or spaces.</value>
3724+
<value>Ensure you have valid text in your your identifier. This error occurs when the identifier is blank, only has spaces, or contains a tab or newline.</value>
37143725
<comment>1 How to fix the error. </comment>
37153726
</data>
37163727
<data name="ErrUnOrderedTypeForComparison_Type" xml:space="preserve">

0 commit comments

Comments
 (0)