From 7fefe7fb745cadc629d85be5ff379ad065647933 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Thu, 20 Mar 2025 17:45:15 -0700 Subject: [PATCH 1/3] Trivial refactor for reading --- .../dotty/tools/dotc/parsing/Parsers.scala | 6 ++-- .../dotty/tools/dotc/parsing/Scanners.scala | 20 +++++------- .../src/dotty/tools/repl/JLineTerminal.scala | 24 +++++++------- .../src/dotty/tools/repl/ParseResult.scala | 32 +++++++++---------- .../dotty/tools/repl/ReplCompilerTests.scala | 7 ++++ 5 files changed, 45 insertions(+), 44 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index c24dbce1b6ac..8818eb0ca2b7 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -1365,8 +1365,10 @@ object Parsers { def literal(negOffset: Int = in.offset, inPattern: Boolean = false, inTypeOrSingleton: Boolean = false, inStringInterpolation: Boolean = false): Tree = { def literalOf(token: Token): Tree = { val isNegated = negOffset < in.offset - def digits0 = in.removeNumberSeparators(in.strVal) - def digits = if (isNegated) "-" + digits0 else digits0 + def digits = + val s = in.strVal + val digits0 = if s.indexOf('_') == -1 then s else s.replace("_", "") + if isNegated then "-" + digits0 else digits0 if !inTypeOrSingleton then token match { case INTLIT => return Number(digits, NumberKind.Whole(in.base)) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index f7050cec41fd..2f4f7a400861 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -163,9 +163,7 @@ object Scanners { strVal = litBuf.toString litBuf.clear() - @inline def isNumberSeparator(c: Char): Boolean = c == '_' - - @inline def removeNumberSeparators(s: String): String = if (s.indexOf('_') == -1) s else s.replace("_", "") + inline def isNumberSeparator(c: Char): Boolean = c == '_' // disallow trailing numeric separator char, but continue lexing def checkNoTrailingSeparator(): Unit = @@ -307,7 +305,7 @@ object Scanners { println(s"\nSTART SKIP AT ${sourcePos().line + 1}, $this in $currentRegion") var noProgress = 0 // Defensive measure to ensure we always get out of the following while loop - // even if source file is weirly formatted (i.e. we never reach EOF) + // even if source file is weirdly formatted (i.e. we never reach EOF) var prevOffset = offset while !atStop && noProgress < 3 do nextToken() @@ -789,20 +787,18 @@ object Scanners { then return true false - /** Is there a blank line between the current token and the last one? - * A blank line consists only of characters <= ' '. - * @pre afterLineEnd(). + /** Is there a blank line between the last token and the current one? + * A blank line is a sequence of only characters <= ' ', between two LFs (or FFs). */ - private def pastBlankLine: Boolean = { + private def pastBlankLine: Boolean = val end = offset def recur(idx: Offset, isBlank: Boolean): Boolean = idx < end && { val ch = buf(idx) - if (ch == LF || ch == FF) isBlank || recur(idx + 1, true) - else recur(idx + 1, isBlank && ch <= ' ') + if ch == LF || ch == FF then isBlank || recur(idx + 1, isBlank = true) + else recur(idx + 1, isBlank = isBlank && ch <= ' ') } - recur(lastOffset, false) - } + recur(lastOffset, isBlank = false) import Character.{isHighSurrogate, isLowSurrogate, isUnicodeIdentifierPart, isUnicodeIdentifierStart, isValidCodePoint, toCodePoint} diff --git a/compiler/src/dotty/tools/repl/JLineTerminal.scala b/compiler/src/dotty/tools/repl/JLineTerminal.scala index e4ac1626525e..902cb9291157 100644 --- a/compiler/src/dotty/tools/repl/JLineTerminal.scala +++ b/compiler/src/dotty/tools/repl/JLineTerminal.scala @@ -109,13 +109,12 @@ class JLineTerminal extends java.io.Closeable { def words = java.util.Collections.emptyList[String] } - def parse(input: String, cursor: Int, context: ParseContext): reader.ParsedLine = { - def parsedLine(word: String, wordCursor: Int) = - new ParsedLine(cursor, input, word, wordCursor) + def parse(input: String, cursor: Int, context: ParseContext): reader.ParsedLine = + def parsedLine(word: String, wordCursor: Int) = ParsedLine(cursor, input, word, wordCursor) // Used when no word is being completed def defaultParsedLine = parsedLine("", 0) - def incomplete(): Nothing = throw new EOFError( + def incomplete(): Nothing = throw EOFError( // Using dummy values, not sure what they are used for /* line = */ -1, /* column = */ -1, @@ -123,12 +122,13 @@ class JLineTerminal extends java.io.Closeable { /* missing = */ newLinePrompt) case class TokenData(token: Token, start: Int, end: Int) - def currentToken: TokenData /* | Null */ = { + + def currentToken: TokenData /* | Null */ = val source = SourceFile.virtual("", input) val scanner = new Scanner(source)(using ctx.fresh.setReporter(Reporter.NoReporter)) var lastBacktickErrorStart: Option[Int] = None - while (scanner.token != EOF) { + while scanner.token != EOF do val start = scanner.offset val token = scanner.token scanner.nextToken() @@ -138,15 +138,13 @@ class JLineTerminal extends java.io.Closeable { if (isCurrentToken) return TokenData(token, lastBacktickErrorStart.getOrElse(start), end) - // we need to enclose the last backtick, which unclosed produces ERROR token if (token == ERROR && input(start) == '`') then lastBacktickErrorStart = Some(start) else lastBacktickErrorStart = None - } null - } + end currentToken def acceptLine = { val onLastLine = !input.substring(cursor).contains(System.lineSeparator) @@ -162,9 +160,9 @@ class JLineTerminal extends java.io.Closeable { // complete we need to ensure that the : isn't split into // 2 tokens, but rather the entire thing is treated as the "word", in // order to insure the : is replaced in the completion. - case ParseContext.COMPLETE if - ParseResult.commands.exists(command => command._1.startsWith(input)) => - parsedLine(input, cursor) + case ParseContext.COMPLETE + if ParseResult.commands.exists(command => command._1.startsWith(input)) => + parsedLine(input, cursor) case ParseContext.COMPLETE => // Parse to find completions (typically after a Tab). @@ -181,6 +179,6 @@ class JLineTerminal extends java.io.Closeable { case _ => incomplete() } - } + end parse } } diff --git a/compiler/src/dotty/tools/repl/ParseResult.scala b/compiler/src/dotty/tools/repl/ParseResult.scala index 2b7740152fa4..dce145a9416a 100644 --- a/compiler/src/dotty/tools/repl/ParseResult.scala +++ b/compiler/src/dotty/tools/repl/ParseResult.scala @@ -211,7 +211,8 @@ object ParseResult { maybeIncomplete(sourceCode, maybeIncomplete = false) private def maybeIncomplete(sourceCode: String, maybeIncomplete: Boolean)(using state: State): ParseResult = - apply(SourceFile.virtual(str.REPL_SESSION_LINE + (state.objectIndex + 1), sourceCode, maybeIncomplete = maybeIncomplete)) + apply: + SourceFile.virtual(str.REPL_SESSION_LINE + (state.objectIndex + 1), sourceCode, maybeIncomplete) /** Check if the input is incomplete. * @@ -219,20 +220,17 @@ object ParseResult { * having to evaluate the expression. */ def isIncomplete(sourceCode: String)(using Context): Boolean = - sourceCode match { - case CommandExtract(_) | "" => false - case _ => { - val reporter = newStoreReporter - val source = SourceFile.virtual("", sourceCode, maybeIncomplete = true) - val unit = CompilationUnit(source, mustExist = false) - val localCtx = ctx.fresh - .setCompilationUnit(unit) - .setReporter(reporter) - var needsMore = false - reporter.withIncompleteHandler((_, _) => needsMore = true) { - parseStats(using localCtx) - } - !reporter.hasErrors && needsMore - } - } + sourceCode match + case CommandExtract(_) | "" => false + case _ => + val reporter = newStoreReporter + val source = SourceFile.virtual("", sourceCode, maybeIncomplete = true) + val unit = CompilationUnit(source, mustExist = false) + val localCtx = ctx.fresh + .setCompilationUnit(unit) + .setReporter(reporter) + var needsMore = false + reporter.withIncompleteHandler((_, _) => needsMore = true): + parseStats(using localCtx) + !reporter.hasErrors && needsMore } diff --git a/compiler/test/dotty/tools/repl/ReplCompilerTests.scala b/compiler/test/dotty/tools/repl/ReplCompilerTests.scala index d32b28647c32..2cef2b7f8be1 100644 --- a/compiler/test/dotty/tools/repl/ReplCompilerTests.scala +++ b/compiler/test/dotty/tools/repl/ReplCompilerTests.scala @@ -511,6 +511,13 @@ class ReplCompilerTests extends ReplTest: val all = lines() assertTrue(hints.forall(hint => all.exists(_.contains(hint)))) + @Test def `i22844 regression colon eol`: Unit = initially: + run: + """|println: + | "hello, world" + |""".stripMargin // outdent, but this test does not exercise the bug + assertEquals(List("hello, world"), lines()) + object ReplCompilerTests: private val pattern = Pattern.compile("\\r[\\n]?|\\n"); From 01f3fe02d824cdddc08e18c776f4217be5715b44 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Thu, 20 Mar 2025 17:46:05 -0700 Subject: [PATCH 2/3] Check trailing blank line at EOF for OUTDENT --- .../src/dotty/tools/dotc/parsing/Scanners.scala | 17 +++++++++++++++-- compiler/src/dotty/tools/dotc/util/Chars.scala | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 2f4f7a400861..3cbcb69609f7 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -601,6 +601,20 @@ object Scanners { lastWidth = r.knownWidth newlineIsSeparating = r.isInstanceOf[InBraces] + // can emit OUTDENT if line is not non-empty blank line at EOF + inline def isTrailingBlankLine: Boolean = + token == EOF && { + val end = buf.length - 1 // take terminal NL as empty last line + val prev = buf.lastIndexWhere(!isWhitespace(_), end = end) + prev < 0 || end - prev > 0 && isLineBreakChar(buf(prev)) + } + + inline def canDedent: Boolean = + lastToken != INDENT + && !isLeadingInfixOperator(nextWidth) + && !statCtdTokens.contains(lastToken) + && !isTrailingBlankLine + if newlineIsSeparating && canEndStatTokens.contains(lastToken) && canStartStatTokens.contains(token) @@ -613,9 +627,8 @@ object Scanners { || nextWidth == lastWidth && (indentPrefix == MATCH || indentPrefix == CATCH) && token != CASE then if currentRegion.isOutermost then if nextWidth < lastWidth then currentRegion = topLevelRegion(nextWidth) - else if !isLeadingInfixOperator(nextWidth) && !statCtdTokens.contains(lastToken) && lastToken != INDENT then + else if canDedent then currentRegion match - case _ if token == EOF => // no OUTDENT at EOF case r: Indented => insert(OUTDENT, offset) handleNewIndentWidth(r.enclosing, ir => diff --git a/compiler/src/dotty/tools/dotc/util/Chars.scala b/compiler/src/dotty/tools/dotc/util/Chars.scala index 916bdfa9dca3..e68c48903a63 100644 --- a/compiler/src/dotty/tools/dotc/util/Chars.scala +++ b/compiler/src/dotty/tools/dotc/util/Chars.scala @@ -50,7 +50,7 @@ object Chars: } /** Is character a whitespace character (but not a new line)? */ - def isWhitespace(c: Char): Boolean = + inline def isWhitespace(c: Char): Boolean = c == ' ' || c == '\t' || c == CR /** Can character form part of a doc comment variable $xxx? */ From 70c7af2c64a3a40036628ee6584c1efd464247ed Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Sat, 22 Mar 2025 00:25:59 -0700 Subject: [PATCH 3/3] Preserve EOF token when probing arrow EOL --- compiler/src/dotty/tools/dotc/parsing/Scanners.scala | 9 ++++++--- compiler/test/dotty/tools/repl/ReplCompilerTests.scala | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala index 3cbcb69609f7..310ed262ae18 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala @@ -682,13 +682,16 @@ object Scanners { reset() if atEOL then token = COLONeol - // consume => and insert if applicable + // consume => and insert if applicable. Used to detect colon arrow: x => def observeArrowIndented(): Unit = if isArrow && indentSyntax then peekAhead() - val atEOL = isAfterLineEnd || token == EOF + val atEOL = isAfterLineEnd + val atEOF = token == EOF reset() - if atEOL then + if atEOF then + token = EOF + else if atEOL then val nextWidth = indentWidth(next.offset) val lastWidth = currentRegion.indentWidth if lastWidth < nextWidth then diff --git a/compiler/test/dotty/tools/repl/ReplCompilerTests.scala b/compiler/test/dotty/tools/repl/ReplCompilerTests.scala index 2cef2b7f8be1..0592cbbed1be 100644 --- a/compiler/test/dotty/tools/repl/ReplCompilerTests.scala +++ b/compiler/test/dotty/tools/repl/ReplCompilerTests.scala @@ -518,6 +518,9 @@ class ReplCompilerTests extends ReplTest: |""".stripMargin // outdent, but this test does not exercise the bug assertEquals(List("hello, world"), lines()) + @Test def `i22844b regression colon arrow eol`: Unit = contextually: + assertTrue(ParseResult.isIncomplete("List(42).map: x =>")) + object ReplCompilerTests: private val pattern = Pattern.compile("\\r[\\n]?|\\n");