From 6d93f1f7da40b8b4d3bd8a8fc2c171556e713890 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Mon, 27 Jan 2025 08:49:48 -0800 Subject: [PATCH 01/20] initial commit for lexer and AST --- Cargo.lock | 3 +- compiler/qsc_qasm3/Cargo.toml | 4 +- compiler/qsc_qasm3/src/keyword.rs | 60 + compiler/qsc_qasm3/src/lex.rs | 51 + compiler/qsc_qasm3/src/lex/cooked.rs | 535 ++++++ compiler/qsc_qasm3/src/lex/cooked/tests.rs | 1830 ++++++++++++++++++++ compiler/qsc_qasm3/src/lex/raw.rs | 447 +++++ compiler/qsc_qasm3/src/lex/raw/tests.rs | 1403 +++++++++++++++ compiler/qsc_qasm3/src/lib.rs | 3 + compiler/qsc_qasm3/src/oqasm_ast.rs | 429 +++++ 10 files changed, 4763 insertions(+), 2 deletions(-) create mode 100644 compiler/qsc_qasm3/src/keyword.rs create mode 100644 compiler/qsc_qasm3/src/lex.rs create mode 100644 compiler/qsc_qasm3/src/lex/cooked.rs create mode 100644 compiler/qsc_qasm3/src/lex/cooked/tests.rs create mode 100644 compiler/qsc_qasm3/src/lex/raw.rs create mode 100644 compiler/qsc_qasm3/src/lex/raw/tests.rs create mode 100644 compiler/qsc_qasm3/src/oqasm_ast.rs diff --git a/Cargo.lock b/Cargo.lock index c18c852995..12ccd4b3dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1474,16 +1474,17 @@ version = "0.0.0" dependencies = [ "bitflags 2.6.0", "difference", + "enum-iterator", "expect-test", "indoc", "miette", "num-bigint", - "oq3_lexer", "oq3_parser", "oq3_semantics", "oq3_source_file", "oq3_syntax", "qsc", + "qsc_data_structures", "qsc_qasm3", "rustc-hash", "thiserror", diff --git a/compiler/qsc_qasm3/Cargo.toml b/compiler/qsc_qasm3/Cargo.toml index 2c3115ed3a..ed7a04355c 100644 --- a/compiler/qsc_qasm3/Cargo.toml +++ b/compiler/qsc_qasm3/Cargo.toml @@ -9,15 +9,17 @@ version.workspace = true [dependencies] bitflags = { workspace = true } +enum-iterator = { workspace = true } num-bigint = { workspace = true } miette = { workspace = true } qsc = { path = "../qsc" } +qsc_data_structures = { path = "../qsc_data_structures" } rustc-hash = { workspace = true } thiserror = { workspace = true } oq3_source_file = { workspace = true } oq3_syntax = { workspace = true } oq3_parser = { workspace = true } -oq3_lexer = { workspace = true } +# oq3_lexer = { workspace = true } oq3_semantics = { workspace = true } [dev-dependencies] diff --git a/compiler/qsc_qasm3/src/keyword.rs b/compiler/qsc_qasm3/src/keyword.rs new file mode 100644 index 0000000000..d6f7783ad6 --- /dev/null +++ b/compiler/qsc_qasm3/src/keyword.rs @@ -0,0 +1,60 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use enum_iterator::Sequence; +use std::{ + fmt::{self, Display, Formatter}, + str::FromStr, +}; + +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Sequence)] +pub enum Keyword { + OpenQASM, + Include, + Defcalgrammar, + Def, + Cal, + Gate, + Extern, + Box, + Let, + + Break, + Continue, + If, + Else, + End, + Return, + For, + While, + In, + Switch, + Case, + Default, + + Pragma, + Annotation, +} + +impl Keyword { + pub(super) fn as_str(self) -> &'static str { + todo!() + } +} + +impl Display for Keyword { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_str(self.as_str()) + } +} + +impl FromStr for Keyword { + type Err = (); + + // This is a hot function. Use a match expression so that the Rust compiler + // can optimize the string comparisons better, and order the cases by + // frequency in Q# so that fewer comparisons are needed on average. + fn from_str(s: &str) -> Result { + todo!() + } +} diff --git a/compiler/qsc_qasm3/src/lex.rs b/compiler/qsc_qasm3/src/lex.rs new file mode 100644 index 0000000000..be3f8849f8 --- /dev/null +++ b/compiler/qsc_qasm3/src/lex.rs @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +pub mod cooked; +pub mod raw; + +use enum_iterator::Sequence; + +pub(super) use cooked::{ClosedBinOp, Error, Lexer, StringToken, Token, TokenKind}; + +/// A delimiter token. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum Delim { + /// `{` or `}` + Brace, + /// `[` or `]` + Bracket, + /// `(` or `)` + Paren, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum Radix { + Binary, + Octal, + Decimal, + Hexadecimal, +} + +impl From for u32 { + fn from(value: Radix) -> Self { + match value { + Radix::Binary => 2, + Radix::Octal => 8, + Radix::Decimal => 10, + Radix::Hexadecimal => 16, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum InterpolatedStart { + DollarQuote, + RBrace, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum InterpolatedEnding { + Quote, + LBrace, +} diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs new file mode 100644 index 0000000000..342dc0df46 --- /dev/null +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -0,0 +1,535 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! The second lexing phase "cooks" a raw token stream, transforming them into tokens that directly +//! correspond to components in the Q# grammar. Keywords are treated as identifiers, except `and` +//! and `or`, which are cooked into [`ClosedBinOp`] so that `and=` and `or=` are lexed correctly. +//! +//! Whitespace and comment tokens are discarded; this means that cooked tokens are not necessarily +//! contiguous, so they include both a starting and ending byte offset. +//! +//! Tokens never contain substrings from the original input, but are simply labels that refer back +//! to regions in the input. Lexing never fails, but may produce error tokens. + +#[cfg(test)] +mod tests; + +use super::{ + raw::{self, Number, Single}, + Delim, InterpolatedEnding, InterpolatedStart, Radix, +}; +use crate::keyword::Keyword; +use enum_iterator::Sequence; +use miette::Diagnostic; +use qsc_data_structures::span::Span; +use std::{ + fmt::{self, Display, Formatter}, + iter::Peekable, +}; +use thiserror::Error; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) struct Token { + pub(crate) kind: TokenKind, + pub(crate) span: Span, +} + +#[derive(Clone, Copy, Debug, Diagnostic, Eq, Error, PartialEq)] +pub enum Error { + #[error("expected {0} to complete {1}, found {2}")] + #[diagnostic(code("Qsc.Lex.Incomplete"))] + Incomplete(raw::TokenKind, TokenKind, raw::TokenKind, #[label] Span), + + #[error("expected {0} to complete {1}, found EOF")] + #[diagnostic(code("Qsc.Lex.IncompleteEof"))] + IncompleteEof(raw::TokenKind, TokenKind, #[label] Span), + + #[error("unterminated string literal")] + #[diagnostic(code("Qsc.Lex.UnterminatedString"))] + UnterminatedString(#[label] Span), + + #[error("unrecognized character `{0}`")] + #[diagnostic(code("Qsc.Lex.UnknownChar"))] + Unknown(char, #[label] Span), +} + +impl Error { + pub(crate) fn with_offset(self, offset: u32) -> Self { + match self { + Self::Incomplete(expected, token, actual, span) => { + Self::Incomplete(expected, token, actual, span + offset) + } + Self::IncompleteEof(expected, token, span) => { + Self::IncompleteEof(expected, token, span + offset) + } + Self::UnterminatedString(span) => Self::UnterminatedString(span + offset), + Self::Unknown(c, span) => Self::Unknown(c, span + offset), + } + } + + pub(crate) fn span(self) -> Span { + match self { + Error::Incomplete(_, _, _, s) + | Error::IncompleteEof(_, _, s) + | Error::UnterminatedString(s) + | Error::Unknown(_, s) => s, + } + } +} + +/// A token kind. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum TokenKind { + /// `'T` + /// used for generic parameters -- an apostrophe followed by an ident. + AposIdent, + /// `@` + At, + /// `!` + Bang, + /// `|` + Bar, + /// A big integer literal. + BigInt(Radix), + /// A closed binary operator followed by an equals token. + BinOpEq(ClosedBinOp), + /// A closing delimiter. + Close(Delim), + /// A closed binary operator not followed by an equals token. + ClosedBinOp(ClosedBinOp), + /// `:` + Colon, + /// `::` + ColonColon, + /// `,` + Comma, + /// A doc comment. + DocComment, + /// `.` + Dot, + /// `..` + DotDot, + /// `...` + DotDotDot, + /// End of file. + Eof, + /// `=` + Eq, + /// `==` + EqEq, + /// `=>` + FatArrow, + /// A floating-point literal. + Float, + /// `>` + Gt, + /// `>=` + Gte, + /// An identifier. + Ident, + /// An integer literal. + Int(Radix), + /// A keyword. + Keyword(Keyword), + /// `<-` + LArrow, + /// `<` + Lt, + /// `<=` + Lte, + /// `!=` + Ne, + /// An opening delimiter. + Open(Delim), + /// `++` + PlusPlus, + /// `->` + RArrow, + /// `;` + Semi, + /// A string literal. + String(StringToken), + /// `~~~` + TildeTildeTilde, + /// `w/` + WSlash, + /// `w/=` + WSlashEq, +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + TokenKind::AposIdent => f.write_str("apostrophe identifier"), + TokenKind::At => f.write_str("`@`"), + TokenKind::Bang => f.write_str("`!`"), + TokenKind::Bar => f.write_str("`|`"), + TokenKind::BigInt(_) => f.write_str("big integer"), + TokenKind::BinOpEq(op) => write!(f, "`{op}=`"), + TokenKind::Close(Delim::Brace) => f.write_str("`}`"), + TokenKind::Close(Delim::Bracket) => f.write_str("`]`"), + TokenKind::Close(Delim::Paren) => f.write_str("`)`"), + TokenKind::ClosedBinOp(op) => write!(f, "`{op}`"), + TokenKind::Colon => f.write_str("`:`"), + TokenKind::ColonColon => f.write_str("`::`"), + TokenKind::Comma => f.write_str("`,`"), + TokenKind::DocComment => f.write_str("doc comment"), + TokenKind::Dot => f.write_str("`.`"), + TokenKind::DotDot => f.write_str("`..`"), + TokenKind::DotDotDot => f.write_str("`...`"), + TokenKind::Eof => f.write_str("EOF"), + TokenKind::Eq => f.write_str("`=`"), + TokenKind::EqEq => f.write_str("`==`"), + TokenKind::FatArrow => f.write_str("`=>`"), + TokenKind::Float => f.write_str("float"), + TokenKind::Gt => f.write_str("`>`"), + TokenKind::Gte => f.write_str("`>=`"), + TokenKind::Ident => f.write_str("identifier"), + TokenKind::Int(_) => f.write_str("integer"), + TokenKind::Keyword(keyword) => write!(f, "keyword `{keyword}`"), + TokenKind::LArrow => f.write_str("`<-`"), + TokenKind::Lt => f.write_str("`<`"), + TokenKind::Lte => f.write_str("`<=`"), + TokenKind::Ne => f.write_str("`!=`"), + TokenKind::Open(Delim::Brace) => f.write_str("`{`"), + TokenKind::Open(Delim::Bracket) => f.write_str("`[`"), + TokenKind::Open(Delim::Paren) => f.write_str("`(`"), + TokenKind::PlusPlus => f.write_str("++"), + TokenKind::RArrow => f.write_str("`->`"), + TokenKind::Semi => f.write_str("`;`"), + TokenKind::String(_) => f.write_str("string"), + TokenKind::TildeTildeTilde => f.write_str("`~~~`"), + TokenKind::WSlash => f.write_str("`w/`"), + TokenKind::WSlashEq => f.write_str("`w/=`"), + } + } +} + +impl From for TokenKind { + fn from(value: Number) -> Self { + match value { + Number::BigInt(radix) => Self::BigInt(radix), + Number::Float => Self::Float, + Number::Int(radix) => Self::Int(radix), + } + } +} + +/// A binary operator that returns the same type as the type of its first operand; in other words, +/// the domain of the first operand is closed under this operation. These are candidates for +/// compound assignment operators, like `+=`. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum ClosedBinOp { + /// `&&&` + AmpAmpAmp, + /// `and` + And, + /// `|||` + BarBarBar, + /// `^` + Caret, + /// `^^^` + CaretCaretCaret, + /// `>>>` + GtGtGt, + /// `<<<` + LtLtLt, + /// `-` + Minus, + /// `or` + Or, + /// `%` + Percent, + /// `+` + Plus, + /// `/` + Slash, + /// `*` + Star, +} + +impl Display for ClosedBinOp { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_str(match self { + ClosedBinOp::AmpAmpAmp => "&&&", + ClosedBinOp::And => "and", + ClosedBinOp::BarBarBar => "|||", + ClosedBinOp::Caret => "^", + ClosedBinOp::CaretCaretCaret => "^^^", + ClosedBinOp::GtGtGt => ">>>", + ClosedBinOp::LtLtLt => "<<<", + ClosedBinOp::Minus => "-", + ClosedBinOp::Or => "or", + ClosedBinOp::Percent => "%", + ClosedBinOp::Plus => "+", + ClosedBinOp::Slash => "/", + ClosedBinOp::Star => "*", + }) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum StringToken { + Normal, + Interpolated(InterpolatedStart, InterpolatedEnding), +} + +pub(crate) struct Lexer<'a> { + input: &'a str, + len: u32, + + // This uses a `Peekable` iterator over the raw lexer, which allows for one token lookahead. + tokens: Peekable>, +} + +impl<'a> Lexer<'a> { + pub(crate) fn new(input: &'a str) -> Self { + Self { + input, + len: input + .len() + .try_into() + .expect("input length should fit into u32"), + tokens: raw::Lexer::new(input).peekable(), + } + } + + fn offset(&mut self) -> u32 { + self.tokens.peek().map_or_else(|| self.len, |t| t.offset) + } + + fn next_if_eq_single(&mut self, single: Single) -> bool { + self.next_if_eq(raw::TokenKind::Single(single)) + } + + fn next_if_eq(&mut self, tok: raw::TokenKind) -> bool { + self.tokens.next_if(|t| t.kind == tok).is_some() + } + + fn expect_single(&mut self, single: Single, complete: TokenKind) -> Result<(), Error> { + self.expect(raw::TokenKind::Single(single), complete) + } + + fn expect(&mut self, tok: raw::TokenKind, complete: TokenKind) -> Result<(), Error> { + if self.next_if_eq(tok) { + Ok(()) + } else if let Some(&raw::Token { kind, offset }) = self.tokens.peek() { + let mut tokens = self.tokens.clone(); + let hi = tokens.nth(1).map_or_else(|| self.len, |t| t.offset); + let span = Span { lo: offset, hi }; + Err(Error::Incomplete(tok, complete, kind, span)) + } else { + let lo = self.len; + let span = Span { lo, hi: lo }; + Err(Error::IncompleteEof(tok, complete, span)) + } + } + + fn cook(&mut self, token: &raw::Token) -> Result, Error> { + let kind = match token.kind { + raw::TokenKind::Comment(raw::CommentKind::Block | raw::CommentKind::Normal) + | raw::TokenKind::Whitespace => Ok(None), + raw::TokenKind::Comment(raw::CommentKind::Doc) => Ok(Some(TokenKind::DocComment)), + raw::TokenKind::Ident => { + let ident = &self.input[(token.offset as usize)..(self.offset() as usize)]; + Ok(Some(self.ident(ident))) + } + raw::TokenKind::Number(number) => Ok(Some(number.into())), + raw::TokenKind::Single(single) => self.single(single).map(Some), + raw::TokenKind::String(raw::StringToken::Normal { terminated: true }) => { + Ok(Some(TokenKind::String(StringToken::Normal))) + } + raw::TokenKind::String(raw::StringToken::Interpolated(start, Some(ending))) => Ok( + Some(TokenKind::String(StringToken::Interpolated(start, ending))), + ), + raw::TokenKind::String( + raw::StringToken::Normal { terminated: false } + | raw::StringToken::Interpolated(_, None), + ) => Err(Error::UnterminatedString(Span { + lo: token.offset, + hi: token.offset, + })), + raw::TokenKind::Unknown => { + let c = self.input[(token.offset as usize)..] + .chars() + .next() + .expect("token offset should be the start of a character"); + let span = Span { + lo: token.offset, + hi: self.offset(), + }; + Err(Error::Unknown(c, span)) + } + }?; + + Ok(kind.map(|kind| { + let span = Span { + lo: token.offset, + hi: self.offset(), + }; + Token { kind, span } + })) + } + + #[allow(clippy::too_many_lines)] + fn single(&mut self, single: Single) -> Result { + match single { + Single::Amp => { + let op = ClosedBinOp::AmpAmpAmp; + self.expect_single(Single::Amp, TokenKind::ClosedBinOp(op))?; + self.expect_single(Single::Amp, TokenKind::ClosedBinOp(op))?; + Ok(self.closed_bin_op(op)) + } + Single::Apos => { + self.expect(raw::TokenKind::Ident, TokenKind::AposIdent)?; + Ok(TokenKind::AposIdent) + } + Single::At => Ok(TokenKind::At), + Single::Bang => { + if self.next_if_eq_single(Single::Eq) { + Ok(TokenKind::Ne) + } else { + Ok(TokenKind::Bang) + } + } + Single::Bar => { + if self.next_if_eq_single(Single::Bar) { + let op = ClosedBinOp::BarBarBar; + self.expect_single(Single::Bar, TokenKind::ClosedBinOp(op))?; + Ok(self.closed_bin_op(op)) + } else { + Ok(TokenKind::Bar) + } + } + Single::Caret => { + if self.next_if_eq_single(Single::Caret) { + let op = ClosedBinOp::CaretCaretCaret; + self.expect_single(Single::Caret, TokenKind::ClosedBinOp(op))?; + Ok(self.closed_bin_op(op)) + } else { + Ok(self.closed_bin_op(ClosedBinOp::Caret)) + } + } + Single::Close(delim) => Ok(TokenKind::Close(delim)), + Single::Colon => { + if self.next_if_eq_single(Single::Colon) { + Ok(TokenKind::ColonColon) + } else { + Ok(TokenKind::Colon) + } + } + Single::Comma => Ok(TokenKind::Comma), + Single::Dot => { + if self.next_if_eq_single(Single::Dot) { + if self.next_if_eq_single(Single::Dot) { + Ok(TokenKind::DotDotDot) + } else { + Ok(TokenKind::DotDot) + } + } else { + Ok(TokenKind::Dot) + } + } + Single::Eq => { + if self.next_if_eq_single(Single::Eq) { + Ok(TokenKind::EqEq) + } else if self.next_if_eq_single(Single::Gt) { + Ok(TokenKind::FatArrow) + } else { + Ok(TokenKind::Eq) + } + } + Single::Gt => { + if self.next_if_eq_single(Single::Eq) { + Ok(TokenKind::Gte) + } else if self.next_if_eq_single(Single::Gt) { + let op = ClosedBinOp::GtGtGt; + self.expect_single(Single::Gt, TokenKind::ClosedBinOp(op))?; + Ok(self.closed_bin_op(op)) + } else { + Ok(TokenKind::Gt) + } + } + Single::Lt => { + if self.next_if_eq_single(Single::Eq) { + Ok(TokenKind::Lte) + } else if self.next_if_eq_single(Single::Minus) { + Ok(TokenKind::LArrow) + } else if self.next_if_eq_single(Single::Lt) { + let op = ClosedBinOp::LtLtLt; + self.expect_single(Single::Lt, TokenKind::ClosedBinOp(op))?; + Ok(self.closed_bin_op(op)) + } else { + Ok(TokenKind::Lt) + } + } + Single::Minus => { + if self.next_if_eq_single(Single::Gt) { + Ok(TokenKind::RArrow) + } else { + Ok(self.closed_bin_op(ClosedBinOp::Minus)) + } + } + Single::Open(delim) => Ok(TokenKind::Open(delim)), + Single::Percent => Ok(self.closed_bin_op(ClosedBinOp::Percent)), + Single::Plus => { + if self.next_if_eq_single(Single::Plus) { + Ok(TokenKind::PlusPlus) + } else { + Ok(self.closed_bin_op(ClosedBinOp::Plus)) + } + } + Single::Semi => Ok(TokenKind::Semi), + Single::Slash => Ok(self.closed_bin_op(ClosedBinOp::Slash)), + Single::Star => Ok(self.closed_bin_op(ClosedBinOp::Star)), + Single::Tilde => { + let complete = TokenKind::TildeTildeTilde; + self.expect_single(Single::Tilde, complete)?; + self.expect_single(Single::Tilde, complete)?; + Ok(complete) + } + } + } + + fn closed_bin_op(&mut self, op: ClosedBinOp) -> TokenKind { + if self.next_if_eq_single(Single::Eq) { + TokenKind::BinOpEq(op) + } else { + TokenKind::ClosedBinOp(op) + } + } + + fn ident(&mut self, ident: &str) -> TokenKind { + match ident { + "and" => self.closed_bin_op(ClosedBinOp::And), + "or" => self.closed_bin_op(ClosedBinOp::Or), + "w" if self.next_if_eq_single(Single::Slash) => { + if self.next_if_eq_single(Single::Eq) { + TokenKind::WSlashEq + } else { + TokenKind::WSlash + } + } + ident => ident + .parse() + .map(TokenKind::Keyword) + .unwrap_or(TokenKind::Ident), + } + } +} + +impl Iterator for Lexer<'_> { + type Item = Result; + + fn next(&mut self) -> Option { + while let Some(token) = self.tokens.next() { + match self.cook(&token) { + Ok(None) => {} + Ok(Some(token)) => return Some(Ok(token)), + Err(err) => return Some(Err(err)), + } + } + + None + } +} diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs new file mode 100644 index 0000000000..3065d6fd48 --- /dev/null +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -0,0 +1,1830 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use super::{Lexer, Token, TokenKind}; +use crate::lex::Delim; +use expect_test::{expect, Expect}; +use qsc_data_structures::span::Span; + +fn check(input: &str, expect: &Expect) { + let actual: Vec<_> = Lexer::new(input).collect(); + expect.assert_debug_eq(&actual); +} + +fn op_string(kind: TokenKind) -> Option { + match kind { + TokenKind::AposIdent => Some("'T".to_string()), + TokenKind::At => Some("@".to_string()), + TokenKind::Bang => Some("!".to_string()), + TokenKind::Bar => Some("|".to_string()), + TokenKind::BinOpEq(op) => Some(format!("{op}=")), + TokenKind::Close(Delim::Brace) => Some("}".to_string()), + TokenKind::Close(Delim::Bracket) => Some("]".to_string()), + TokenKind::Close(Delim::Paren) => Some(")".to_string()), + TokenKind::ClosedBinOp(op) => Some(op.to_string()), + TokenKind::Colon => Some(":".to_string()), + TokenKind::ColonColon => Some("::".to_string()), + TokenKind::Comma => Some(",".to_string()), + TokenKind::Dot => Some(".".to_string()), + TokenKind::DotDot => Some("..".to_string()), + TokenKind::DotDotDot => Some("...".to_string()), + TokenKind::Eq => Some("=".to_string()), + TokenKind::EqEq => Some("==".to_string()), + TokenKind::FatArrow => Some("=>".to_string()), + TokenKind::Gt => Some(">".to_string()), + TokenKind::Gte => Some(">=".to_string()), + TokenKind::LArrow => Some("<-".to_string()), + TokenKind::Lt => Some("<".to_string()), + TokenKind::Lte => Some("<=".to_string()), + TokenKind::Ne => Some("!=".to_string()), + TokenKind::Open(Delim::Brace) => Some("{".to_string()), + TokenKind::Open(Delim::Bracket) => Some("[".to_string()), + TokenKind::Open(Delim::Paren) => Some("(".to_string()), + TokenKind::PlusPlus => Some("++".to_string()), + TokenKind::RArrow => Some("->".to_string()), + TokenKind::Semi => Some(";".to_string()), + TokenKind::TildeTildeTilde => Some("~~~".to_string()), + TokenKind::WSlash => Some("w/".to_string()), + TokenKind::WSlashEq => Some("w/=".to_string()), + TokenKind::BigInt(_) + | TokenKind::DocComment + | TokenKind::Eof + | TokenKind::Float + | TokenKind::Ident + | TokenKind::Int(_) + | TokenKind::Keyword(_) + | TokenKind::String(_) => None, + } +} + +#[test] +fn basic_ops() { + for kind in enum_iterator::all() { + let Some(input) = op_string(kind) else { + continue; + }; + let actual: Vec<_> = Lexer::new(&input).collect(); + let len = input + .len() + .try_into() + .expect("input length should fit into u32"); + assert_eq!( + actual, + vec![Ok(Token { + kind, + span: Span { lo: 0, hi: len } + }),] + ); + } +} + +#[test] +fn empty() { + check( + "", + &expect![[r#" + [] + "#]], + ); +} + +#[test] +fn amp() { + check( + "&", + &expect![[r#" + [ + Err( + IncompleteEof( + Single( + Amp, + ), + ClosedBinOp( + AmpAmpAmp, + ), + Span { + lo: 1, + hi: 1, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn amp_amp() { + check( + "&&", + &expect![[r#" + [ + Err( + IncompleteEof( + Single( + Amp, + ), + ClosedBinOp( + AmpAmpAmp, + ), + Span { + lo: 2, + hi: 2, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn amp_plus() { + check( + "&+", + &expect![[r#" + [ + Err( + Incomplete( + Single( + Amp, + ), + ClosedBinOp( + AmpAmpAmp, + ), + Single( + Plus, + ), + Span { + lo: 1, + hi: 2, + }, + ), + ), + Ok( + Token { + kind: ClosedBinOp( + Plus, + ), + span: Span { + lo: 1, + hi: 2, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn amp_multibyte() { + check( + "&🦀", + &expect![[r#" + [ + Err( + Incomplete( + Single( + Amp, + ), + ClosedBinOp( + AmpAmpAmp, + ), + Unknown, + Span { + lo: 1, + hi: 5, + }, + ), + ), + Err( + Unknown( + '🦀', + Span { + lo: 1, + hi: 5, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn amp_amp_amp_amp_amp_amp() { + check( + "&&&&&&", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + AmpAmpAmp, + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: ClosedBinOp( + AmpAmpAmp, + ), + span: Span { + lo: 3, + hi: 6, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn caret_caret() { + check( + "^^", + &expect![[r#" + [ + Err( + IncompleteEof( + Single( + Caret, + ), + ClosedBinOp( + CaretCaretCaret, + ), + Span { + lo: 2, + hi: 2, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn and_ws_eq() { + check( + "and =", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + And, + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Eq, + span: Span { + lo: 4, + hi: 5, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn w() { + check( + "w", + &expect![[r#" + [ + Ok( + Token { + kind: Ident, + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn w_slash_eq_ident() { + check( + "w/=foo", + &expect![[r#" + [ + Ok( + Token { + kind: WSlashEq, + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 3, + hi: 6, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn int() { + check( + "123", + &expect![[r#" + [ + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn negative_int() { + check( + "-123", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + Minus, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 1, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn positive_int() { + check( + "+123", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + Plus, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 1, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn bigint() { + check( + "123L", + &expect![[r#" + [ + Ok( + Token { + kind: BigInt( + Decimal, + ), + span: Span { + lo: 0, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn negative_bigint() { + check( + "-123L", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + Minus, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: BigInt( + Decimal, + ), + span: Span { + lo: 1, + hi: 5, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn positive_bigint() { + check( + "+123L", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + Plus, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: BigInt( + Decimal, + ), + span: Span { + lo: 1, + hi: 5, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn float() { + check( + "1.23", + &expect![[r#" + [ + Ok( + Token { + kind: Float, + span: Span { + lo: 0, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn negative_float() { + check( + "-1.23", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + Minus, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Float, + span: Span { + lo: 1, + hi: 5, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn positive_float() { + check( + "+1.23", + &expect![[r#" + [ + Ok( + Token { + kind: ClosedBinOp( + Plus, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Float, + span: Span { + lo: 1, + hi: 5, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn leading_point() { + check( + ".1", + &expect![[r#" + [ + Ok( + Token { + kind: Dot, + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 1, + hi: 2, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn trailing_point() { + check( + "1.", + &expect![[r#" + [ + Ok( + Token { + kind: Float, + span: Span { + lo: 0, + hi: 2, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn leading_zero_float() { + check( + "0.42", + &expect![[r#" + [ + Ok( + Token { + kind: Float, + span: Span { + lo: 0, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn dot_dot_int() { + check( + "..1", + &expect![[r#" + [ + Ok( + Token { + kind: DotDot, + span: Span { + lo: 0, + hi: 2, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 2, + hi: 3, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn dot_dot_dot_int() { + check( + "...1", + &expect![[r#" + [ + Ok( + Token { + kind: DotDotDot, + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 3, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn int_dot_dot() { + check( + "1..", + &expect![[r#" + [ + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: DotDot, + span: Span { + lo: 1, + hi: 3, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn int_dot_dot_dot() { + check( + "1...", + &expect![[r#" + [ + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: DotDotDot, + span: Span { + lo: 1, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn dot_dot_dot_int_dot_dot_dot() { + check( + "...1...", + &expect![[r#" + [ + Ok( + Token { + kind: DotDotDot, + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 3, + hi: 4, + }, + }, + ), + Ok( + Token { + kind: DotDotDot, + span: Span { + lo: 4, + hi: 7, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn two_points_with_leading() { + check( + ".1.2", + &expect![[r#" + [ + Ok( + Token { + kind: Dot, + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Float, + span: Span { + lo: 1, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn leading_point_exp() { + check( + ".1e2", + &expect![[r#" + [ + Ok( + Token { + kind: Dot, + span: Span { + lo: 0, + hi: 1, + }, + }, + ), + Ok( + Token { + kind: Float, + span: Span { + lo: 1, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn ident() { + check( + "foo", + &expect![[r#" + [ + Ok( + Token { + kind: Ident, + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn string() { + check( + r#""string""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Normal, + ), + span: Span { + lo: 0, + hi: 8, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn string_empty() { + check( + r#""""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Normal, + ), + span: Span { + lo: 0, + hi: 2, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn string_missing_ending() { + check( + r#""Uh oh..."#, + &expect![[r#" + [ + Err( + UnterminatedString( + Span { + lo: 0, + hi: 0, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_missing_ending() { + check( + r#"$"string"#, + &expect![[r#" + [ + Err( + UnterminatedString( + Span { + lo: 0, + hi: 0, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string() { + check( + r#"$"string""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + Quote, + ), + ), + span: Span { + lo: 0, + hi: 9, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_braced() { + check( + r#"$"{x}""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 3, + hi: 4, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 4, + hi: 6, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_escape_brace() { + check( + r#"$"\{""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + Quote, + ), + ), + span: Span { + lo: 0, + hi: 5, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unclosed_brace() { + check( + r#"$"{"#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unclosed_brace_quote() { + check( + r#"$"{""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Err( + UnterminatedString( + Span { + lo: 3, + hi: 3, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unopened_brace() { + check( + r#"$"}"#, + &expect![[r#" + [ + Err( + UnterminatedString( + Span { + lo: 0, + hi: 0, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unopened_brace_quote() { + check( + r#"$"}""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + Quote, + ), + ), + span: Span { + lo: 0, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_braced_index() { + check( + r#"$"{xs[0]}""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 3, + hi: 5, + }, + }, + ), + Ok( + Token { + kind: Open( + Bracket, + ), + span: Span { + lo: 5, + hi: 6, + }, + }, + ), + Ok( + Token { + kind: Int( + Decimal, + ), + span: Span { + lo: 6, + hi: 7, + }, + }, + ), + Ok( + Token { + kind: Close( + Bracket, + ), + span: Span { + lo: 7, + hi: 8, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 8, + hi: 10, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_two_braced() { + check( + r#"$"{x} {y}""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 3, + hi: 4, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + LBrace, + ), + ), + span: Span { + lo: 4, + hi: 7, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 7, + hi: 8, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 8, + hi: 10, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn interpolated_string_braced_normal_string() { + check( + r#"$"{"{}"}""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: String( + Normal, + ), + span: Span { + lo: 3, + hi: 7, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 7, + hi: 9, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn nested_interpolated_string() { + check( + r#"$"{$"{x}"}""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 3, + hi: 6, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 6, + hi: 7, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 7, + hi: 9, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 9, + hi: 11, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn nested_interpolated_string_with_exprs() { + check( + r#"$"foo {x + $"bar {y}"} baz""#, + &expect![[r#" + [ + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 0, + hi: 7, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 7, + hi: 8, + }, + }, + ), + Ok( + Token { + kind: ClosedBinOp( + Plus, + ), + span: Span { + lo: 9, + hi: 10, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + DollarQuote, + LBrace, + ), + ), + span: Span { + lo: 11, + hi: 18, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 18, + hi: 19, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 19, + hi: 21, + }, + }, + ), + Ok( + Token { + kind: String( + Interpolated( + RBrace, + Quote, + ), + ), + span: Span { + lo: 21, + hi: 27, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn unknown() { + check( + "##", + &expect![[r#" + [ + Err( + Unknown( + '#', + Span { + lo: 0, + hi: 1, + }, + ), + ), + Err( + Unknown( + '#', + Span { + lo: 1, + hi: 2, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn comment() { + check( + "//comment\nx", + &expect![[r#" + [ + Ok( + Token { + kind: Ident, + span: Span { + lo: 10, + hi: 11, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn doc_comment() { + check( + "///comment\nx", + &expect![[r#" + [ + Ok( + Token { + kind: DocComment, + span: Span { + lo: 0, + hi: 10, + }, + }, + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 11, + hi: 12, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn comment_four_slashes() { + check( + "////comment\nx", + &expect![[r#" + [ + Ok( + Token { + kind: Ident, + span: Span { + lo: 12, + hi: 13, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn unfinished_generic() { + check( + "' T", + &expect![[r#" + [ + Err( + Incomplete( + Ident, + AposIdent, + Whitespace, + Span { + lo: 1, + hi: 3, + }, + ), + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 3, + hi: 4, + }, + }, + ), + ] + "#]], + ); +} +#[test] +fn unfinished_generic_2() { + check( + "'// test + T", + &expect![[r#" + [ + Err( + Incomplete( + Ident, + AposIdent, + Comment( + Normal, + ), + Span { + lo: 1, + hi: 8, + }, + ), + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 18, + hi: 19, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn unfinished_generic_3() { + check( + "' T", + &expect![[r#" + [ + Err( + Incomplete( + Ident, + AposIdent, + Whitespace, + Span { + lo: 1, + hi: 5, + }, + ), + ), + Ok( + Token { + kind: Ident, + span: Span { + lo: 5, + hi: 6, + }, + }, + ), + ] + "#]], + ); +} +#[test] +fn correct_generic() { + check( + "'T", + &expect![[r#" + [ + Ok( + Token { + kind: AposIdent, + span: Span { + lo: 0, + hi: 2, + }, + }, + ), + ] + "#]], + ); +} +#[test] +fn generic_missing_ident() { + check( + "'", + &expect![[r#" + [ + Err( + IncompleteEof( + Ident, + AposIdent, + Span { + lo: 1, + hi: 1, + }, + ), + ), + ] + "#]], + ); +} + +#[test] +fn generic_underscore_name() { + check( + "'_", + &expect![[r#" + [ + Ok( + Token { + kind: AposIdent, + span: Span { + lo: 0, + hi: 2, + }, + }, + ), + ] + "#]], + ); +} diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs new file mode 100644 index 0000000000..6a6b5e2abe --- /dev/null +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -0,0 +1,447 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! The first lexing phase transforms an input string into literals, single-character operators, +//! whitespace, and comments. Keywords are treated as identifiers. The raw token stream is +//! contiguous: there are no gaps between tokens. +//! +//! These are "raw" tokens because single-character operators don't always correspond to Q# +//! operators, and whitespace and comments will later be discarded. Raw tokens are the ingredients +//! that are "cooked" into compound tokens before they can be consumed by the parser. +//! +//! Tokens never contain substrings from the original input, but are simply labels that refer back +//! to offsets in the input. Lexing never fails, but may produce unknown tokens. + +#[cfg(test)] +mod tests; + +use super::{Delim, InterpolatedEnding, InterpolatedStart, Radix}; +use enum_iterator::Sequence; +use std::{ + fmt::{self, Display, Formatter, Write}, + iter::Peekable, + str::CharIndices, +}; + +/// A raw token. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Token { + /// The token kind. + pub kind: TokenKind, + /// The byte offset of the token starting character. + pub offset: u32, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum TokenKind { + Comment(CommentKind), + Ident, + Number(Number), + Single(Single), + String(StringToken), + Unknown, + Whitespace, +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + TokenKind::Comment(CommentKind::Block) => f.write_str("block comment"), + TokenKind::Comment(CommentKind::Normal) => f.write_str("comment"), + TokenKind::Comment(CommentKind::Doc) => f.write_str("doc comment"), + TokenKind::Ident => f.write_str("identifier"), + TokenKind::Number(Number::BigInt(_)) => f.write_str("big integer"), + TokenKind::Number(Number::Float) => f.write_str("float"), + TokenKind::Number(Number::Int(_)) => f.write_str("integer"), + TokenKind::Single(single) => write!(f, "`{single}`"), + TokenKind::String(_) => f.write_str("string"), + TokenKind::Unknown => f.write_str("unknown"), + TokenKind::Whitespace => f.write_str("whitespace"), + } + } +} + +/// A single-character operator token. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum Single { + /// `&` + Amp, + /// `'` + Apos, + /// `@` + At, + /// `!` + Bang, + /// `|` + Bar, + /// `^` + Caret, + /// A closing delimiter. + Close(Delim), + /// `:` + Colon, + /// `,` + Comma, + /// `.` + Dot, + /// `=` + Eq, + /// `>` + Gt, + /// `<` + Lt, + /// `-` + Minus, + /// An opening delimiter. + Open(Delim), + /// `%` + Percent, + /// `+` + Plus, + /// `;` + Semi, + /// `/` + Slash, + /// `*` + Star, + /// `~` + Tilde, +} + +impl Display for Single { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_char(match self { + Single::Amp => '&', + Single::Apos => '\'', + Single::At => '@', + Single::Bang => '!', + Single::Bar => '|', + Single::Caret => '^', + Single::Close(Delim::Brace) => '}', + Single::Close(Delim::Bracket) => ']', + Single::Close(Delim::Paren) => ')', + Single::Colon => ':', + Single::Comma => ',', + Single::Dot => '.', + Single::Eq => '=', + Single::Gt => '>', + Single::Lt => '<', + Single::Minus => '-', + Single::Open(Delim::Brace) => '{', + Single::Open(Delim::Bracket) => '[', + Single::Open(Delim::Paren) => '(', + Single::Percent => '%', + Single::Plus => '+', + Single::Semi => ';', + Single::Slash => '/', + Single::Star => '*', + Single::Tilde => '~', + }) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum Number { + BigInt(Radix), + Float, + Int(Radix), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum StringToken { + Normal { terminated: bool }, + Interpolated(InterpolatedStart, Option), +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum StringKind { + Normal, + Interpolated, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum CommentKind { + Block, + Normal, + Doc, +} + +#[derive(Clone)] +pub struct Lexer<'a> { + chars: Peekable>, + interpolation: u8, + starting_offset: u32, +} + +impl<'a> Lexer<'a> { + #[must_use] + pub fn new(input: &'a str) -> Self { + Self { + chars: input.char_indices().peekable(), + interpolation: 0, + starting_offset: 0, + } + } + + #[must_use] + pub fn new_with_starting_offset(input: &'a str, starting_offset: u32) -> Self { + Self { + chars: input.char_indices().peekable(), + interpolation: 0, + starting_offset, + } + } + + fn next_if_eq(&mut self, c: char) -> bool { + self.chars.next_if(|i| i.1 == c).is_some() + } + + fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) { + while self.chars.next_if(|i| f(i.1)).is_some() {} + } + + /// Returns the first character ahead of the cursor without consuming it. This operation is fast, + /// but if you know you want to consume the character if it matches, use [`next_if_eq`] instead. + fn first(&mut self) -> Option { + self.chars.peek().map(|i| i.1) + } + + /// Returns the second character ahead of the cursor without consuming it. This is slower + /// than [`first`] and should be avoided when possible. + fn second(&self) -> Option { + let mut chars = self.chars.clone(); + chars.next(); + chars.next().map(|i| i.1) + } + + fn whitespace(&mut self, c: char) -> bool { + if c.is_whitespace() { + self.eat_while(char::is_whitespace); + true + } else { + false + } + } + + fn comment(&mut self, c: char) -> Option { + if c == '/' && self.next_if_eq('/') { + let kind = if self.first() == Some('/') && self.second() != Some('/') { + self.chars.next(); + CommentKind::Doc + } else { + CommentKind::Normal + }; + + self.eat_while(|c| c != '\n' && c != '\r'); + Some(kind) + } else if c == '/' && self.next_if_eq('*') { + loop { + let (_, c) = self.chars.next()?; + if c == '*' && self.next_if_eq('/') { + return Some(CommentKind::Block); + } + } + } else { + None + } + } + + fn ident(&mut self, c: char) -> bool { + if c == '_' || c.is_alphabetic() { + self.eat_while(|c| c == '_' || c.is_alphanumeric()); + true + } else { + false + } + } + + fn number(&mut self, c: char) -> Option { + self.leading_zero(c).or_else(|| self.decimal(c)) + } + + fn leading_zero(&mut self, c: char) -> Option { + if c != '0' { + return None; + } + + let radix = if self.next_if_eq('b') || self.next_if_eq('B') { + Radix::Binary + } else if self.next_if_eq('o') || self.next_if_eq('O') { + Radix::Octal + } else if self.next_if_eq('x') || self.next_if_eq('X') { + Radix::Hexadecimal + } else { + Radix::Decimal + }; + + self.eat_while(|c| c == '_' || c.is_digit(radix.into())); + if self.next_if_eq('L') { + Some(Number::BigInt(radix)) + } else if radix == Radix::Decimal && self.float() { + Some(Number::Float) + } else { + Some(Number::Int(radix)) + } + } + + fn decimal(&mut self, c: char) -> Option { + if !c.is_ascii_digit() { + return None; + } + + self.eat_while(|c| c == '_' || c.is_ascii_digit()); + + if self.float() { + Some(Number::Float) + } else if self.next_if_eq('L') { + Some(Number::BigInt(Radix::Decimal)) + } else { + Some(Number::Int(Radix::Decimal)) + } + } + + fn float(&mut self) -> bool { + // Watch out for ranges: `0..` should be an integer followed by two dots. + if self.first() == Some('.') && self.second() != Some('.') { + self.chars.next(); + self.eat_while(|c| c == '_' || c.is_ascii_digit()); + self.exp(); + true + } else { + self.exp() + } + } + + fn exp(&mut self) -> bool { + if self.next_if_eq('e') || self.next_if_eq('E') { + self.chars.next_if(|i| i.1 == '+' || i.1 == '-'); + self.eat_while(|c| c.is_ascii_digit()); + true + } else { + false + } + } + + fn string(&mut self, c: char) -> Option { + let kind = self.start_string(c)?; + + while self.first().is_some_and(|c| !is_string_terminator(kind, c)) { + self.eat_while(|c| c != '\\' && !is_string_terminator(kind, c)); + if self.next_if_eq('\\') { + self.chars.next(); + } + } + + Some(TokenKind::String(self.finish_string(c, kind))) + } + + fn start_string(&mut self, c: char) -> Option { + if c == '$' { + if self.next_if_eq('"') { + Some(StringKind::Interpolated) + } else { + None + } + } else if c == '"' { + Some(StringKind::Normal) + } else if self.interpolation > 0 && c == '}' { + self.interpolation = self + .interpolation + .checked_sub(1) + .expect("interpolation level should have been incremented at left brace"); + Some(StringKind::Interpolated) + } else { + None + } + } + + fn finish_string(&mut self, start: char, kind: StringKind) -> StringToken { + match kind { + StringKind::Normal => StringToken::Normal { + terminated: self.next_if_eq('"'), + }, + StringKind::Interpolated => { + let start = if start == '$' { + InterpolatedStart::DollarQuote + } else { + InterpolatedStart::RBrace + }; + + let end = if self.next_if_eq('{') { + self.interpolation = self + .interpolation + .checked_add(1) + .expect("interpolation should not exceed maximum depth"); + Some(InterpolatedEnding::LBrace) + } else if self.next_if_eq('"') { + Some(InterpolatedEnding::Quote) + } else { + None // Unterminated string. + }; + + StringToken::Interpolated(start, end) + } + } + } +} + +impl Iterator for Lexer<'_> { + type Item = Token; + + fn next(&mut self) -> Option { + let (offset, c) = self.chars.next()?; + let kind = if let Some(kind) = self.comment(c) { + TokenKind::Comment(kind) + } else if self.whitespace(c) { + TokenKind::Whitespace + } else if self.ident(c) { + TokenKind::Ident + } else { + self.number(c) + .map(TokenKind::Number) + .or_else(|| self.string(c)) + .or_else(|| single(c).map(TokenKind::Single)) + .unwrap_or(TokenKind::Unknown) + }; + let offset: u32 = offset.try_into().expect("offset should fit into u32"); + Some(Token { + kind, + offset: offset + self.starting_offset, + }) + } +} + +fn single(c: char) -> Option { + match c { + '-' => Some(Single::Minus), + ',' => Some(Single::Comma), + ';' => Some(Single::Semi), + ':' => Some(Single::Colon), + '!' => Some(Single::Bang), + '.' => Some(Single::Dot), + '\'' => Some(Single::Apos), + '(' => Some(Single::Open(Delim::Paren)), + ')' => Some(Single::Close(Delim::Paren)), + '[' => Some(Single::Open(Delim::Bracket)), + ']' => Some(Single::Close(Delim::Bracket)), + '{' => Some(Single::Open(Delim::Brace)), + '}' => Some(Single::Close(Delim::Brace)), + '@' => Some(Single::At), + '*' => Some(Single::Star), + '/' => Some(Single::Slash), + '&' => Some(Single::Amp), + '%' => Some(Single::Percent), + '^' => Some(Single::Caret), + '+' => Some(Single::Plus), + '<' => Some(Single::Lt), + '=' => Some(Single::Eq), + '>' => Some(Single::Gt), + '|' => Some(Single::Bar), + '~' => Some(Single::Tilde), + _ => None, + } +} + +fn is_string_terminator(kind: StringKind, c: char) -> bool { + c == '"' || kind == StringKind::Interpolated && c == '{' +} diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs new file mode 100644 index 0000000000..1a8e014ee9 --- /dev/null +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -0,0 +1,1403 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use super::Lexer; +use crate::lex::raw::{Single, Token, TokenKind}; +use expect_test::{expect, Expect}; + +fn check(input: &str, expect: &Expect) { + let actual: Vec<_> = Lexer::new(input).collect(); + expect.assert_debug_eq(&actual); +} + +#[test] +fn singles() { + for single in enum_iterator::all::() { + let actual: Vec<_> = Lexer::new(&single.to_string()).collect(); + let kind = TokenKind::Single(single); + assert_eq!(actual, vec![Token { kind, offset: 0 }]); + } +} + +#[test] +fn braces() { + check( + "{}", + &expect![[r#" + [ + Token { + kind: Single( + Open( + Brace, + ), + ), + offset: 0, + }, + Token { + kind: Single( + Close( + Brace, + ), + ), + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn negate() { + check( + "-x", + &expect![[r#" + [ + Token { + kind: Single( + Minus, + ), + offset: 0, + }, + Token { + kind: Ident, + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn whitespace() { + check( + "- x", + &expect![[r#" + [ + Token { + kind: Single( + Minus, + ), + offset: 0, + }, + Token { + kind: Whitespace, + offset: 1, + }, + Token { + kind: Ident, + offset: 4, + }, + ] + "#]], + ); +} + +#[test] +fn comment() { + check( + "//comment\nx", + &expect![[r#" + [ + Token { + kind: Comment( + Normal, + ), + offset: 0, + }, + Token { + kind: Whitespace, + offset: 9, + }, + Token { + kind: Ident, + offset: 10, + }, + ] + "#]], + ); +} + +#[test] +fn doc_comment() { + check( + "///comment\nx", + &expect![[r#" + [ + Token { + kind: Comment( + Doc, + ), + offset: 0, + }, + Token { + kind: Whitespace, + offset: 10, + }, + Token { + kind: Ident, + offset: 11, + }, + ] + "#]], + ); +} + +#[test] +fn comment_four_slashes() { + check( + "////comment\nx", + &expect![[r#" + [ + Token { + kind: Comment( + Normal, + ), + offset: 0, + }, + Token { + kind: Whitespace, + offset: 11, + }, + Token { + kind: Ident, + offset: 12, + }, + ] + "#]], + ); +} + +#[test] +fn string() { + check( + r#""string""#, + &expect![[r#" + [ + Token { + kind: String( + Normal { + terminated: true, + }, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn string_escape_quote() { + check( + r#""str\"ing""#, + &expect![[r#" + [ + Token { + kind: String( + Normal { + terminated: true, + }, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn string_missing_ending() { + check( + r#""string"#, + &expect![[r#" + [ + Token { + kind: String( + Normal { + terminated: false, + }, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_missing_ending() { + check( + r#"$"string"#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + None, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string() { + check( + r#"$"string""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + Quote, + ), + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_braced() { + check( + r#"$"{x}""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: Ident, + offset: 3, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 4, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_escape_brace() { + check( + r#"$"\{""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + Quote, + ), + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unclosed_brace() { + check( + r#"$"{"#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unclosed_brace_quote() { + check( + r#"$"{""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: String( + Normal { + terminated: false, + }, + ), + offset: 3, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unopened_brace() { + check( + r#"$"}"#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + None, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_unopened_brace_quote() { + check( + r#"$"}""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + Quote, + ), + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_braced_index() { + check( + r#"$"{xs[0]}""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: Ident, + offset: 3, + }, + Token { + kind: Single( + Open( + Bracket, + ), + ), + offset: 5, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 6, + }, + Token { + kind: Single( + Close( + Bracket, + ), + ), + offset: 7, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 8, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_two_braced() { + check( + r#"$"{x} {y}""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: Ident, + offset: 3, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + LBrace, + ), + ), + ), + offset: 4, + }, + Token { + kind: Ident, + offset: 7, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 8, + }, + ] + "#]], + ); +} + +#[test] +fn interpolated_string_braced_normal_string() { + check( + r#"$"{"{}"}""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: String( + Normal { + terminated: true, + }, + ), + offset: 3, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 7, + }, + ] + "#]], + ); +} + +#[test] +fn nested_interpolated_string() { + check( + r#"$"{$"{x}"}""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 3, + }, + Token { + kind: Ident, + offset: 6, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 7, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 9, + }, + ] + "#]], + ); +} + +#[test] +fn nested_interpolated_string_with_exprs() { + check( + r#"$"foo {x + $"bar {y}"} baz""#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: Ident, + offset: 7, + }, + Token { + kind: Whitespace, + offset: 8, + }, + Token { + kind: Single( + Plus, + ), + offset: 9, + }, + Token { + kind: Whitespace, + offset: 10, + }, + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 11, + }, + Token { + kind: Ident, + offset: 18, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 19, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 21, + }, + ] + "#]], + ); +} + +#[test] +fn nested_interpolated_string_followed_by_braces() { + check( + r#"$"{$"{x}"}" {y}"#, + &expect![[r#" + [ + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 0, + }, + Token { + kind: String( + Interpolated( + DollarQuote, + Some( + LBrace, + ), + ), + ), + offset: 3, + }, + Token { + kind: Ident, + offset: 6, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 7, + }, + Token { + kind: String( + Interpolated( + RBrace, + Some( + Quote, + ), + ), + ), + offset: 9, + }, + Token { + kind: Whitespace, + offset: 11, + }, + Token { + kind: Single( + Open( + Brace, + ), + ), + offset: 12, + }, + Token { + kind: Ident, + offset: 13, + }, + Token { + kind: Single( + Close( + Brace, + ), + ), + offset: 14, + }, + ] + "#]], + ); +} + +#[test] +fn binary() { + check( + "0b10110", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Binary, + ), + ), + offset: 0, + }, + ] + "#]], + ); + check( + "0B10110", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Binary, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn octal() { + check( + "0o70351", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Octal, + ), + ), + offset: 0, + }, + ] + "#]], + ); + check( + "0O70351", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Octal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn decimal() { + check( + "123", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn number_seps() { + check( + "123_456", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn number_underscore_prefix() { + check( + "_123_456", + &expect![[r#" + [ + Token { + kind: Ident, + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn int_dot_dot() { + check( + "0..", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 0, + }, + Token { + kind: Single( + Dot, + ), + offset: 1, + }, + Token { + kind: Single( + Dot, + ), + offset: 2, + }, + ] + "#]], + ); +} + +#[test] +fn dot_dot_int() { + check( + "..0", + &expect![[r#" + [ + Token { + kind: Single( + Dot, + ), + offset: 0, + }, + Token { + kind: Single( + Dot, + ), + offset: 1, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 2, + }, + ] + "#]], + ); +} + +#[test] +fn dot_dot_dot_int() { + check( + "...0", + &expect![[r#" + [ + Token { + kind: Single( + Dot, + ), + offset: 0, + }, + Token { + kind: Single( + Dot, + ), + offset: 1, + }, + Token { + kind: Single( + Dot, + ), + offset: 2, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 3, + }, + ] + "#]], + ); +} + +#[test] +fn hexadecimal() { + check( + "0x123abc", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Hexadecimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); + check( + "0X123abc", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Hexadecimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn bigint() { + check( + "123L", + &expect![[r#" + [ + Token { + kind: Number( + BigInt( + Decimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn negative() { + check( + "-4", + &expect![[r#" + [ + Token { + kind: Single( + Minus, + ), + offset: 0, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn positive() { + check( + "+4", + &expect![[r#" + [ + Token { + kind: Single( + Plus, + ), + offset: 0, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn bigint_hexadecimal() { + check( + "0x123abcL", + &expect![[r#" + [ + Token { + kind: Number( + BigInt( + Hexadecimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn float() { + check( + "1.23", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_zero() { + check( + "0123", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_point() { + check( + ".123", + &expect![[r#" + [ + Token { + kind: Single( + Dot, + ), + offset: 0, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn trailing_point() { + check( + "123.", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn exp() { + check( + "1e23", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); + check( + "1E23", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn exp_plus() { + check( + "1e+23", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn exp_minus() { + check( + "1e-23", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_point_exp() { + check( + ".25e2", + &expect![[r#" + [ + Token { + kind: Single( + Dot, + ), + offset: 0, + }, + Token { + kind: Number( + Float, + ), + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn leading_zero_point() { + check( + "0.25", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_zero_zero_point() { + check( + "00.25", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_zero_exp() { + check( + "0.25e2", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn unknown() { + check( + "##", + &expect![[r#" + [ + Token { + kind: Unknown, + offset: 0, + }, + Token { + kind: Unknown, + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn float_hexadecimal() { + check( + "0x123.45", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Hexadecimal, + ), + ), + offset: 0, + }, + Token { + kind: Single( + Dot, + ), + offset: 5, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 6, + }, + ] + "#]], + ); +} diff --git a/compiler/qsc_qasm3/src/lib.rs b/compiler/qsc_qasm3/src/lib.rs index f4bf212ebb..7be343e15b 100644 --- a/compiler/qsc_qasm3/src/lib.rs +++ b/compiler/qsc_qasm3/src/lib.rs @@ -6,6 +6,9 @@ mod ast_builder; mod compile; pub use compile::qasm_to_program; pub mod io; +mod keyword; +mod lex; +mod oqasm_ast; mod oqasm_helpers; mod oqasm_types; pub mod parse; diff --git a/compiler/qsc_qasm3/src/oqasm_ast.rs b/compiler/qsc_qasm3/src/oqasm_ast.rs new file mode 100644 index 0000000000..34b955863e --- /dev/null +++ b/compiler/qsc_qasm3/src/oqasm_ast.rs @@ -0,0 +1,429 @@ +use qsc::Span; +use std::convert::Infallible; + +// TODO: profile this with iai-callgrind in a large OpenQASM3 +// sample to verify that is actually faster than using Vec. +/// An alternative to `Vec` that uses less stack space. +type List = Box<[Box]>; +type Identifier = String; + +enum Union { + First(T1), + Second(T2), + Third(T3), +} + +enum QASMNode { + Program(Program), + Annotation(Annotation), + Stmt(Stmt), + Expr(Expr), + DiscreteSet(DiscreteSet), + RangeDefinition(RangeDefinition), + IndexedIdentifier(IndexedIdentifier), + QuantumGateModifier(QuantumGateModifier), + QuantumMeasurement(QuantumMeasurement), + ClassicalArgument(ClassicalArgument), + ExternArgument(ExternArgument), + ClassicalType(ClassicalType), + QuantumArgument(QuantumArgument), + Pragma(Pragma), +} + +struct Program { + span: Span, + statements: List>, + version: Option, +} + +struct Annotation { + span: Span, + keyword: String, + command: Option, +} + +struct Stmt { + span: Span, + annotations: List, + kind: Box, +} + +struct Expr { + span: Span, + kind: Box, +} + +struct DiscreteSet { + span: Span, + values: List, +} + +struct RangeDefinition { + start: Option, + end: Option, + step: Option, +} + +struct IndexedIdentifier { + name: Identifier, + indices: List, +} + +struct QuantumGateModifier { + span: Span, + qubit: Union, +} + +struct QuantumMeasurement { + span: Span, + qubit: Union, +} + +struct ClassicalArgument { + span: Span, + r#type: ClassicalType, + name: Identifier, + access: Option, +} + +struct ExternArgument { + span: Span, + r#type: ClassicalType, + access: Option, +} + +enum ClassicalType { + Int(IntType), + UInt(UIntType), + Float(FloatType), + Complex(ComplexType), + Angle(AngleType), + Bit(BitType), + BoolType, + Array { + base_type: ArrayBaseType, + dimensions: List, + }, + ArrayReference { + base_type: ArrayBaseType, + dimensions: Union>, + }, + Duration, + Stretch, +} + +enum ArrayBaseType { + Int(IntType), + UInt(UIntType), + Float(FloatType), + Complex(ComplexType), + Angle(AngleType), + Bit(BitType), + BoolType, +} + +struct IntType { + size: Option, +} + +struct UIntType { + size: Option, +} + +struct FloatType { + size: Option, +} + +struct ComplexType { + base_size: Option, +} + +struct AngleType { + size: Option, +} + +struct BitType { + size: Option, +} + +enum AccessControl { + ReadOnly, + Mutable, +} + +struct QuantumArgument { + span: Span, + size: Option, +} + +struct Pragma { + span: Span, + command: String, +} + +enum StmtKind { + CompoundStmt(CompoundStmt), + Include(String), + ExpressionStatement(Expr), + QubitDeclaration(Identifier, Option), + QuantumGateDefinition(QuantumGateDefinition), + ExternDeclaration(ExternDeclaration), + Quantum(QuantumStmt), + Measurement(QuantumMeasurementStmt), + ClassicalDeclaration(ClassicalDeclaration), + IODeclaration(IODeclaration), + ConstantDeclaration(ConstantDeclaration), + CalibrationGrammarDeclaration { name: String }, + CalibrationStatement { body: String }, + CalibrationDefinition(CalibrationDefinition), + SubroutineDefinition(SubroutineDefinition), + Return(Option>), + Break, + Continue, + Branching(BranchingStmt), + WhileLoop(WhileLoop), + ForInLoop(ForInLoop), + Switch(SwitchStmt), + ClassicalAssignment(ClassicalAssignment), +} + +type CompoundStmt = List; + +struct QuantumGateDefinition { + name: Identifier, + arguments: Vec, + qubits: Vec, + body: Vec, +} + +struct ExternDeclaration { + name: Identifier, + arguments: List, + return_type: Option, +} + +enum QuantumStmt { + Gate(QuantumGate), + Phase(QuantumPhase), + Barrier(List), + Reset(List>), + DelayInstruction(DelayInstruction), + Box(BoxStmt), +} + +struct QuantumGate { + modifiers: List, + name: Identifier, + args: List, + qubits: List>, + duration: Option, +} + +struct QuantumPhase { + modifiers: List, + arg: Expr, + qubits: List>, +} + +struct DelayInstruction { + duration: Expr, + qubits: List>, +} + +struct BoxStmt { + duration: Option, + body: List, +} + +struct QuantumMeasurementStmt { + measure: QuantumMeasurement, + target: Option>>, +} + +struct ClassicalDeclaration { + r#type: ClassicalType, + identifier: Identifier, + init_expr: Option>, +} + +struct IODeclaration { + io_identifier: IOKeyword, + r#type: ClassicalType, + identifier: Identifier, +} + +struct ConstantDeclaration { + r#type: ClassicalType, + identifier: Identifier, + init_expr: Expr, +} + +struct CalibrationDefinition { + name: Identifier, + args: List>, + qubits: List, + return_type: Option, + body: String, +} + +struct SubroutineDefinition { + name: Identifier, + args: List>, + body: List, + return_type: Option, +} + +struct BranchingStmt { + condition: Expr, + if_block: List, + else_block: List, +} + +struct WhileLoop { + while_condition: Expr, + block: List, +} + +struct ForInLoop { + r#type: ClassicalType, + identifier: Identifier, + set_declaration: Union, + block: List, +} + +struct SwitchStmt { + target: Expr, + cases: List<(List, CompoundStmt)>, + /// Note that `None` is quite different to `[]` in this case; the latter is + /// an explicitly empty body, whereas the absence of a default might mean + /// that the switch is inexhaustive, and a linter might want to complain. + default: Option, +} + +struct ClassicalAssignment { + lvalue: Union, + op: AssignmentOp, +} + +enum ExprKind { + Identifier(Identifier), + UnaryExpr(UnaryOp, Expr), + BinaryExpr(BinaryOp, Expr, Expr), + Literal(Literal), + FunctionCall { + name: Identifier, + args: List, + }, + Cast { + r#type: ClassicalType, + arg: Expr, + }, + Concatenation(Concatenation), + IndexExpr { + collection: Expr, + index: IndexElement, + }, + DurationOf { + target: List, + }, + SizeOf { + target: Expr, + value: Union, + }, +} + +enum UnaryOp { + NegB, + NegL, + NegN, +} + +enum BinaryOp { + /// `>` + Gt, + /// `<` + Lt, + /// `>=` + Gte, + /// `<=` + Lte, + /// `==` + Eq, + /// `!=` + Neq, + /// `&&` + AndL, + /// `||` + OrL, + /// `|` + OrB, + /// `^` + XorB, + /// `&` + AndB, + /// `<<` + ShL, + /// `>>` + ShR, + /// `+` + Add, + /// `-` + Sub, + /// `*` + Mul, + /// `/` + Div, + /// `%` + Mod, + /// `**` + Exp, +} + +enum Literal { + Integer(i64), + Float(f64), + Imaginary(f64), + Boolean(bool), + Bitstring { value: usize, width: u32 }, + Duration { value: f64, unit: TimeUnit }, + Array(List), +} + +struct Concatenation { + lhs: Expr, + rhs: Expr, +} + +type IndexElement = Union>>; + +enum AssignmentOp { + BinaryOp(BinaryOp), + /// OpenQASM3 has the `~=` assignment operator. + /// This enum variant is meant to capture that. + UnaryOp(UnaryOp), + Assign, +} + +enum GateModifierName { + Inv, + Pow, + Ctrl, + NegCtrl, +} + +enum IOKeyword { + Input, + Output, +} + +enum TimeUnit { + Dt, + /// Nanoseconds. + Ns, + /// Microseconds. + Us, + /// Milliseconds. + Ms, + /// Seconds. + S, +} From 25a4013c2ea189b00f74021971387e92416d5fde Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Mon, 27 Jan 2025 09:57:04 -0800 Subject: [PATCH 02/20] add a `Span` to all AST items --- compiler/qsc_qasm3/src/oqasm_ast.rs | 113 +++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 20 deletions(-) diff --git a/compiler/qsc_qasm3/src/oqasm_ast.rs b/compiler/qsc_qasm3/src/oqasm_ast.rs index 34b955863e..835578a670 100644 --- a/compiler/qsc_qasm3/src/oqasm_ast.rs +++ b/compiler/qsc_qasm3/src/oqasm_ast.rs @@ -5,7 +5,6 @@ use std::convert::Infallible; // sample to verify that is actually faster than using Vec. /// An alternative to `Vec` that uses less stack space. type List = Box<[Box]>; -type Identifier = String; enum Union { First(T1), @@ -59,12 +58,19 @@ struct DiscreteSet { } struct RangeDefinition { + span: Span, start: Option, end: Option, step: Option, } +struct Identifier { + span: Span, + name: String, +} + struct IndexedIdentifier { + span: Span, name: Identifier, indices: List, } @@ -92,7 +98,12 @@ struct ExternArgument { access: Option, } -enum ClassicalType { +struct ClassicalType { + span: Span, + kind: ClassicalTypeKind, +} + +enum ClassicalTypeKind { Int(IntType), UInt(UIntType), Float(FloatType), @@ -100,19 +111,13 @@ enum ClassicalType { Angle(AngleType), Bit(BitType), BoolType, - Array { - base_type: ArrayBaseType, - dimensions: List, - }, - ArrayReference { - base_type: ArrayBaseType, - dimensions: Union>, - }, + Array(ArrayType), + ArrayReference(ArrayReferenceType), Duration, Stretch, } -enum ArrayBaseType { +enum ArrayBaseTypeKind { Int(IntType), UInt(UIntType), Float(FloatType), @@ -146,6 +151,18 @@ struct BitType { size: Option, } +struct ArrayType { + span: Span, + base_type: ArrayBaseTypeKind, + dimensions: List, +} + +struct ArrayReferenceType { + span: Span, + base_type: ArrayBaseTypeKind, + dimensions: Union>, +} + enum AccessControl { ReadOnly, Mutable, @@ -163,9 +180,9 @@ struct Pragma { enum StmtKind { CompoundStmt(CompoundStmt), - Include(String), - ExpressionStatement(Expr), - QubitDeclaration(Identifier, Option), + Include(IncludeStmt), + ExpressionStmt(Expr), + QubitDeclaration(QubitDeclaration), QuantumGateDefinition(QuantumGateDefinition), ExternDeclaration(ExternDeclaration), Quantum(QuantumStmt), @@ -173,11 +190,11 @@ enum StmtKind { ClassicalDeclaration(ClassicalDeclaration), IODeclaration(IODeclaration), ConstantDeclaration(ConstantDeclaration), - CalibrationGrammarDeclaration { name: String }, - CalibrationStatement { body: String }, + CalibrationGrammarDeclaration(CalibrationGrammarDeclaration), + CalibrationStmt(CalibrationStmt), CalibrationDefinition(CalibrationDefinition), SubroutineDefinition(SubroutineDefinition), - Return(Option>), + Return(ReturnStmt), Break, Continue, Branching(BranchingStmt), @@ -187,9 +204,24 @@ enum StmtKind { ClassicalAssignment(ClassicalAssignment), } -type CompoundStmt = List; +struct CompoundStmt { + span: Span, + statements: List, +} + +struct IncludeStmt { + span: Span, + filename: String, +} + +struct QubitDeclaration { + span: Span, + qubit: Identifier, + size: Option, +} struct QuantumGateDefinition { + span: Span, name: Identifier, arguments: Vec, qubits: Vec, @@ -197,12 +229,18 @@ struct QuantumGateDefinition { } struct ExternDeclaration { + span: Span, name: Identifier, arguments: List, return_type: Option, } -enum QuantumStmt { +struct QuantumStmt { + span: Span, + kind: QuantumStmtKind, +} + +enum QuantumStmtKind { Gate(QuantumGate), Phase(QuantumPhase), Barrier(List), @@ -212,6 +250,7 @@ enum QuantumStmt { } struct QuantumGate { + span: Span, modifiers: List, name: Identifier, args: List, @@ -220,45 +259,63 @@ struct QuantumGate { } struct QuantumPhase { + span: Span, modifiers: List, arg: Expr, qubits: List>, } struct DelayInstruction { + span: Span, duration: Expr, qubits: List>, } struct BoxStmt { + span: Span, duration: Option, body: List, } struct QuantumMeasurementStmt { + span: Span, measure: QuantumMeasurement, target: Option>>, } struct ClassicalDeclaration { + span: Span, r#type: ClassicalType, identifier: Identifier, init_expr: Option>, } struct IODeclaration { + span: Span, io_identifier: IOKeyword, r#type: ClassicalType, identifier: Identifier, } struct ConstantDeclaration { + span: Span, r#type: ClassicalType, identifier: Identifier, init_expr: Expr, } +struct CalibrationGrammarDeclaration { + span: Span, + name: String, +} + +struct CalibrationStmt { + span: Span, + body: String, +} + struct CalibrationDefinition { + span: Span, name: Identifier, args: List>, qubits: List, @@ -267,24 +324,33 @@ struct CalibrationDefinition { } struct SubroutineDefinition { + span: Span, name: Identifier, args: List>, body: List, return_type: Option, } +struct ReturnStmt { + span: Span, + expr: Option>, +} + struct BranchingStmt { + span: Span, condition: Expr, if_block: List, else_block: List, } struct WhileLoop { + span: Span, while_condition: Expr, block: List, } struct ForInLoop { + span: Span, r#type: ClassicalType, identifier: Identifier, set_declaration: Union, @@ -292,6 +358,7 @@ struct ForInLoop { } struct SwitchStmt { + span: Span, target: Expr, cases: List<(List, CompoundStmt)>, /// Note that `None` is quite different to `[]` in this case; the latter is @@ -301,6 +368,7 @@ struct SwitchStmt { } struct ClassicalAssignment { + span: Span, lvalue: Union, op: AssignmentOp, } @@ -379,7 +447,12 @@ enum BinaryOp { Exp, } -enum Literal { +struct Literal { + span: Span, + kind: LiteralKind, +} + +enum LiteralKind { Integer(i64), Float(f64), Imaginary(f64), From 3023459c595be88802d17e84fcc87354da787588 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Mon, 27 Jan 2025 11:31:54 -0800 Subject: [PATCH 03/20] Wrap each `ExprKind` variant into its own struct. --- compiler/qsc_qasm3/src/oqasm_ast.rs | 69 ++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/compiler/qsc_qasm3/src/oqasm_ast.rs b/compiler/qsc_qasm3/src/oqasm_ast.rs index 835578a670..3790b56410 100644 --- a/compiler/qsc_qasm3/src/oqasm_ast.rs +++ b/compiler/qsc_qasm3/src/oqasm_ast.rs @@ -375,29 +375,56 @@ struct ClassicalAssignment { enum ExprKind { Identifier(Identifier), - UnaryExpr(UnaryOp, Expr), - BinaryExpr(BinaryOp, Expr, Expr), + UnaryExpr(UnaryExpr), + BinaryExpr(BinaryExpr), Literal(Literal), - FunctionCall { - name: Identifier, - args: List, - }, - Cast { - r#type: ClassicalType, - arg: Expr, - }, + FunctionCall(FunctionCall), + Cast(Cast), Concatenation(Concatenation), - IndexExpr { - collection: Expr, - index: IndexElement, - }, - DurationOf { - target: List, - }, - SizeOf { - target: Expr, - value: Union, - }, + IndexExpr(IndexExpr), + DurationOf(DurationOf), + SizeOf(SizeOf), +} + +struct UnaryExpr { + span: Span, + op: UnaryOp, + expr: Expr, +} + +struct BinaryExpr { + span: Span, + op: BinaryOp, + lhs: Expr, + rhs: Expr, +} +struct FunctionCall { + span: Span, + name: Identifier, + args: List, +} + +struct Cast { + span: Span, + r#type: ClassicalType, + arg: Expr, +} + +struct IndexExpr { + span: Span, + collection: Expr, + index: IndexElement, +} + +struct DurationOf { + span: Span, + target: List, +} + +struct SizeOf { + span: Span, + target: Expr, + value: Union, } enum UnaryOp { From 3e52491cf36978f52bdc6478b008f351c0d2586d Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Mon, 27 Jan 2025 23:45:31 -0800 Subject: [PATCH 04/20] lexer work in progress --- compiler/qsc_qasm3/src/lex/cooked.rs | 194 +++++---------------------- compiler/qsc_qasm3/src/lex/raw.rs | 10 +- 2 files changed, 38 insertions(+), 166 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 342dc0df46..b11d4eaea0 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -80,128 +80,48 @@ impl Error { /// A token kind. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum TokenKind { - /// `'T` - /// used for generic parameters -- an apostrophe followed by an ident. - AposIdent, - /// `@` - At, - /// `!` - Bang, - /// `|` - Bar, - /// A big integer literal. - BigInt(Radix), - /// A closed binary operator followed by an equals token. - BinOpEq(ClosedBinOp), - /// A closing delimiter. - Close(Delim), - /// A closed binary operator not followed by an equals token. - ClosedBinOp(ClosedBinOp), - /// `:` + Keyword, + Type, + + // Builtin identifiers and operations + GPhase, + Inv, + Pow, + Ctrl, + NegCtrl, + Dim, + DurationOf, + Delay, + Reset, + Measure, + Barrier, + + Literal, + + // Symbols + Open, + Close, + + // Punctuation Colon, - /// `::` - ColonColon, - /// `,` - Comma, - /// A doc comment. - DocComment, - /// `.` + Semicolon, Dot, - /// `..` - DotDot, - /// `...` - DotDotDot, - /// End of file. - Eof, - /// `=` - Eq, - /// `==` - EqEq, - /// `=>` - FatArrow, - /// A floating-point literal. - Float, - /// `>` - Gt, - /// `>=` - Gte, - /// An identifier. - Ident, - /// An integer literal. - Int(Radix), - /// A keyword. - Keyword(Keyword), - /// `<-` - LArrow, - /// `<` - Lt, - /// `<=` - Lte, - /// `!=` - Ne, - /// An opening delimiter. - Open(Delim), - /// `++` - PlusPlus, - /// `->` - RArrow, - /// `;` - Semi, - /// A string literal. - String(StringToken), - /// `~~~` - TildeTildeTilde, - /// `w/` - WSlash, - /// `w/=` - WSlashEq, + Comma, + + // Operators, + UnaryOperator, + BinaryOperator, + + Identifier, + HardwareQubit, + + Whitespace, + Comment, } impl Display for TokenKind { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - TokenKind::AposIdent => f.write_str("apostrophe identifier"), - TokenKind::At => f.write_str("`@`"), - TokenKind::Bang => f.write_str("`!`"), - TokenKind::Bar => f.write_str("`|`"), - TokenKind::BigInt(_) => f.write_str("big integer"), - TokenKind::BinOpEq(op) => write!(f, "`{op}=`"), - TokenKind::Close(Delim::Brace) => f.write_str("`}`"), - TokenKind::Close(Delim::Bracket) => f.write_str("`]`"), - TokenKind::Close(Delim::Paren) => f.write_str("`)`"), - TokenKind::ClosedBinOp(op) => write!(f, "`{op}`"), - TokenKind::Colon => f.write_str("`:`"), - TokenKind::ColonColon => f.write_str("`::`"), - TokenKind::Comma => f.write_str("`,`"), - TokenKind::DocComment => f.write_str("doc comment"), - TokenKind::Dot => f.write_str("`.`"), - TokenKind::DotDot => f.write_str("`..`"), - TokenKind::DotDotDot => f.write_str("`...`"), - TokenKind::Eof => f.write_str("EOF"), - TokenKind::Eq => f.write_str("`=`"), - TokenKind::EqEq => f.write_str("`==`"), - TokenKind::FatArrow => f.write_str("`=>`"), - TokenKind::Float => f.write_str("float"), - TokenKind::Gt => f.write_str("`>`"), - TokenKind::Gte => f.write_str("`>=`"), - TokenKind::Ident => f.write_str("identifier"), - TokenKind::Int(_) => f.write_str("integer"), - TokenKind::Keyword(keyword) => write!(f, "keyword `{keyword}`"), - TokenKind::LArrow => f.write_str("`<-`"), - TokenKind::Lt => f.write_str("`<`"), - TokenKind::Lte => f.write_str("`<=`"), - TokenKind::Ne => f.write_str("`!=`"), - TokenKind::Open(Delim::Brace) => f.write_str("`{`"), - TokenKind::Open(Delim::Bracket) => f.write_str("`[`"), - TokenKind::Open(Delim::Paren) => f.write_str("`(`"), - TokenKind::PlusPlus => f.write_str("++"), - TokenKind::RArrow => f.write_str("`->`"), - TokenKind::Semi => f.write_str("`;`"), - TokenKind::String(_) => f.write_str("string"), - TokenKind::TildeTildeTilde => f.write_str("`~~~`"), - TokenKind::WSlash => f.write_str("`w/`"), - TokenKind::WSlashEq => f.write_str("`w/=`"), - } + todo!() } } @@ -219,47 +139,13 @@ impl From for TokenKind { /// the domain of the first operand is closed under this operation. These are candidates for /// compound assignment operators, like `+=`. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum ClosedBinOp { - /// `&&&` - AmpAmpAmp, - /// `and` - And, - /// `|||` - BarBarBar, - /// `^` - Caret, - /// `^^^` - CaretCaretCaret, - /// `>>>` - GtGtGt, - /// `<<<` - LtLtLt, - /// `-` - Minus, - /// `or` - Or, - /// `%` - Percent, - /// `+` - Plus, - /// `/` - Slash, - /// `*` - Star, -} +pub enum ClosedBinOp {} impl Display for ClosedBinOp { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.write_str(match self { - ClosedBinOp::AmpAmpAmp => "&&&", - ClosedBinOp::And => "and", - ClosedBinOp::BarBarBar => "|||", ClosedBinOp::Caret => "^", - ClosedBinOp::CaretCaretCaret => "^^^", - ClosedBinOp::GtGtGt => ">>>", - ClosedBinOp::LtLtLt => "<<<", ClosedBinOp::Minus => "-", - ClosedBinOp::Or => "or", ClosedBinOp::Percent => "%", ClosedBinOp::Plus => "+", ClosedBinOp::Slash => "/", @@ -268,12 +154,6 @@ impl Display for ClosedBinOp { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum StringToken { - Normal, - Interpolated(InterpolatedStart, InterpolatedEnding), -} - pub(crate) struct Lexer<'a> { input: &'a str, len: u32, diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 6a6b5e2abe..6751d5a6b2 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -48,7 +48,6 @@ impl Display for TokenKind { match self { TokenKind::Comment(CommentKind::Block) => f.write_str("block comment"), TokenKind::Comment(CommentKind::Normal) => f.write_str("comment"), - TokenKind::Comment(CommentKind::Doc) => f.write_str("doc comment"), TokenKind::Ident => f.write_str("identifier"), TokenKind::Number(Number::BigInt(_)) => f.write_str("big integer"), TokenKind::Number(Number::Float) => f.write_str("float"), @@ -163,7 +162,6 @@ enum StringKind { pub enum CommentKind { Block, Normal, - Doc, } #[derive(Clone)] @@ -225,13 +223,7 @@ impl<'a> Lexer<'a> { fn comment(&mut self, c: char) -> Option { if c == '/' && self.next_if_eq('/') { - let kind = if self.first() == Some('/') && self.second() != Some('/') { - self.chars.next(); - CommentKind::Doc - } else { - CommentKind::Normal - }; - + let kind = CommentKind::Normal; self.eat_while(|c| c != '\n' && c != '\r'); Some(kind) } else if c == '/' && self.next_if_eq('*') { From e7665785ec0d6f6121768f5694e573fb5cf5fef8 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Tue, 28 Jan 2025 15:31:34 -0800 Subject: [PATCH 05/20] define cooked TokenKinds --- compiler/qsc_qasm3/src/lex/cooked.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index b11d4eaea0..dd8e12dc29 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -80,8 +80,8 @@ impl Error { /// A token kind. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum TokenKind { - Keyword, - Type, + Keyword(Keyword), + Type(Type), // Builtin identifiers and operations GPhase, @@ -96,11 +96,11 @@ pub enum TokenKind { Measure, Barrier, - Literal, + Literal(Literal), // Symbols - Open, - Close, + Open(Delim), + Close(Delim), // Punctuation Colon, @@ -109,8 +109,8 @@ pub enum TokenKind { Comma, // Operators, - UnaryOperator, - BinaryOperator, + UnaryOperator(UnaryOperator), + BinaryOperator(BinaryOperator), Identifier, HardwareQubit, From db48505c53468ec3d2456c5f8dbdfe1c892b3a07 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Wed, 29 Jan 2025 01:42:01 -0800 Subject: [PATCH 06/20] cook most of the tokens (work in progress) --- compiler/qsc_qasm3/src/keyword.rs | 27 +- compiler/qsc_qasm3/src/lex.rs | 2 +- compiler/qsc_qasm3/src/lex/cooked.rs | 311 ++++++---- compiler/qsc_qasm3/src/lex/cooked/tests.rs | 181 ++---- compiler/qsc_qasm3/src/lex/raw.rs | 95 +--- compiler/qsc_qasm3/src/lex/raw/tests.rs | 626 +-------------------- compiler/qsc_qasm3/src/oqasm_ast.rs | 10 +- 7 files changed, 279 insertions(+), 973 deletions(-) diff --git a/compiler/qsc_qasm3/src/keyword.rs b/compiler/qsc_qasm3/src/keyword.rs index d6f7783ad6..47392c615f 100644 --- a/compiler/qsc_qasm3/src/keyword.rs +++ b/compiler/qsc_qasm3/src/keyword.rs @@ -55,6 +55,31 @@ impl FromStr for Keyword { // can optimize the string comparisons better, and order the cases by // frequency in Q# so that fewer comparisons are needed on average. fn from_str(s: &str) -> Result { - todo!() + match s { + "openqasm" => Ok(Self::OpenQASM), + "include" => Ok(Self::Include), + "defcalgrammar" => Ok(Self::Defcalgrammar), + "def" => Ok(Self::Def), + "cal" => Ok(Self::Cal), + "gate" => Ok(Self::Gate), + "extern" => Ok(Self::Extern), + "box" => Ok(Self::Box), + "let" => Ok(Self::Let), + "break" => Ok(Self::Break), + "continue" => Ok(Self::Continue), + "if" => Ok(Self::If), + "else" => Ok(Self::Else), + "end" => Ok(Self::End), + "return" => Ok(Self::Return), + "for" => Ok(Self::For), + "while" => Ok(Self::While), + "in" => Ok(Self::In), + "switch" => Ok(Self::Switch), + "case" => Ok(Self::Case), + "default" => Ok(Self::Default), + "pragma" => Ok(Self::Pragma), + "annotation" => Ok(Self::Annotation), + _ => Err(()), + } } } diff --git a/compiler/qsc_qasm3/src/lex.rs b/compiler/qsc_qasm3/src/lex.rs index be3f8849f8..df04e5d460 100644 --- a/compiler/qsc_qasm3/src/lex.rs +++ b/compiler/qsc_qasm3/src/lex.rs @@ -6,7 +6,7 @@ pub mod raw; use enum_iterator::Sequence; -pub(super) use cooked::{ClosedBinOp, Error, Lexer, StringToken, Token, TokenKind}; +pub(super) use cooked::{Error, Lexer, Token, TokenKind}; /// A delimiter token. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index dd8e12dc29..ef6e485368 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -2,8 +2,8 @@ // Licensed under the MIT License. //! The second lexing phase "cooks" a raw token stream, transforming them into tokens that directly -//! correspond to components in the Q# grammar. Keywords are treated as identifiers, except `and` -//! and `or`, which are cooked into [`ClosedBinOp`] so that `and=` and `or=` are lexed correctly. +//! correspond to components in the `OpenQASM` grammar. Keywords are treated as identifiers, except `and` +//! and `or`, which are cooked into [`BinaryOperator`] so that `and=` and `or=` are lexed correctly. //! //! Whitespace and comment tokens are discarded; this means that cooked tokens are not necessarily //! contiguous, so they include both a starting and ending byte offset. @@ -16,7 +16,7 @@ mod tests; use super::{ raw::{self, Number, Single}, - Delim, InterpolatedEnding, InterpolatedStart, Radix, + Delim, Radix, }; use crate::keyword::Keyword; use enum_iterator::Sequence; @@ -99,18 +99,31 @@ pub enum TokenKind { Literal(Literal), // Symbols + /// `{[(` Open(Delim), + /// `}])` Close(Delim), // Punctuation + /// `:` Colon, + /// `;` Semicolon, + /// `.` Dot, + /// `,` Comma, + /// `++` + PlusPlus, + /// `->` + Arrow, // Operators, UnaryOperator(UnaryOperator), - BinaryOperator(BinaryOperator), + BinaryOperator(ClosedBinaryOperator), + BinaryOperatorEq(ClosedBinaryOperator), + ComparisonOperator(ComparisonOperator), + Eq, Identifier, HardwareQubit, @@ -119,6 +132,104 @@ pub enum TokenKind { Comment, } +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum Type { + Input, + Output, + Const, + Readonly, + Mutable, + + QReg, + Qubit, + + CReg, + Bool, + Bit, + Int, + UInt, + Float, + Angle, + Complex, + Array, + Void, + + Duration, + Stretch, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum Literal { + Bitstring, + Boolean, + Float, + Imaginary, + Integer(Radix), + String, + Timing, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum UnaryOperator { + /// `!` + Bang, + /// `-` + Minus, + /// `~` + Tilde, +} + +/// A binary operator that returns the same type as the type of its first operand; in other words, +/// the domain of the first operand is closed under this operation. These are candidates for +/// compound assignment operators, like `+=`. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum ClosedBinaryOperator { + /// `&` + Amp, + /// `|` + Bar, + /// `^` + Caret, + /// `>>` + GtGt, + /// `<<` + LtLt, + /// `-` + Minus, + /// `%` + Percent, + /// `+` + Plus, + /// `/` + Slash, + /// `*` + Star, + /// `**` + StarStar, + // TODO: missing Tilde according to qasm3Lexer.g4 to be able to express ~= + // But this is this a bug in the official qasm lexer? +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum ComparisonOperator { + /// `&&` + AmpAmp, + /// `!=` + BangEq, + /// `||` + BarBar, + /// `==` + EqEq, + /// `>` + Gt, + /// `>=` + GtEq, + /// `<` + Lt, + /// `<=` + LtEq, +} + impl Display for TokenKind { fn fmt(&self, f: &mut Formatter) -> fmt::Result { todo!() @@ -128,29 +239,15 @@ impl Display for TokenKind { impl From for TokenKind { fn from(value: Number) -> Self { match value { - Number::BigInt(radix) => Self::BigInt(radix), - Number::Float => Self::Float, - Number::Int(radix) => Self::Int(radix), + Number::Float => Self::Literal(Literal::Float), + Number::Int(radix) => Self::Literal(Literal::Integer(radix)), } } } -/// A binary operator that returns the same type as the type of its first operand; in other words, -/// the domain of the first operand is closed under this operation. These are candidates for -/// compound assignment operators, like `+=`. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum ClosedBinOp {} - -impl Display for ClosedBinOp { +impl Display for ClosedBinaryOperator { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.write_str(match self { - ClosedBinOp::Caret => "^", - ClosedBinOp::Minus => "-", - ClosedBinOp::Percent => "%", - ClosedBinOp::Plus => "+", - ClosedBinOp::Slash => "/", - ClosedBinOp::Star => "*", - }) + todo!() } } @@ -209,26 +306,21 @@ impl<'a> Lexer<'a> { let kind = match token.kind { raw::TokenKind::Comment(raw::CommentKind::Block | raw::CommentKind::Normal) | raw::TokenKind::Whitespace => Ok(None), - raw::TokenKind::Comment(raw::CommentKind::Doc) => Ok(Some(TokenKind::DocComment)), raw::TokenKind::Ident => { let ident = &self.input[(token.offset as usize)..(self.offset() as usize)]; Ok(Some(self.ident(ident))) } raw::TokenKind::Number(number) => Ok(Some(number.into())), raw::TokenKind::Single(single) => self.single(single).map(Some), - raw::TokenKind::String(raw::StringToken::Normal { terminated: true }) => { - Ok(Some(TokenKind::String(StringToken::Normal))) + raw::TokenKind::String(raw::StringToken { terminated: true }) => { + Ok(Some(TokenKind::Literal(Literal::String))) + } + raw::TokenKind::String(raw::StringToken { terminated: false }) => { + Err(Error::UnterminatedString(Span { + lo: token.offset, + hi: token.offset, + })) } - raw::TokenKind::String(raw::StringToken::Interpolated(start, Some(ending))) => Ok( - Some(TokenKind::String(StringToken::Interpolated(start, ending))), - ), - raw::TokenKind::String( - raw::StringToken::Normal { terminated: false } - | raw::StringToken::Interpolated(_, None), - ) => Err(Error::UnterminatedString(Span { - lo: token.offset, - hi: token.offset, - })), raw::TokenKind::Unknown => { let c = self.input[(token.offset as usize)..] .chars() @@ -251,149 +343,136 @@ impl<'a> Lexer<'a> { })) } + /// Consumes a list of tokens zero or more times. + fn kleen_star(&mut self, tokens: &[raw::TokenKind], complete: TokenKind) -> Result<(), Error> { + let mut iter = tokens.iter(); + while self.next_if_eq(*(iter.next().expect("tokens should have at least one token"))) { + for token in iter { + self.expect(*token, complete)? + } + iter = tokens.iter(); + } + Ok(()) + } + #[allow(clippy::too_many_lines)] fn single(&mut self, single: Single) -> Result { match single { Single::Amp => { - let op = ClosedBinOp::AmpAmpAmp; - self.expect_single(Single::Amp, TokenKind::ClosedBinOp(op))?; - self.expect_single(Single::Amp, TokenKind::ClosedBinOp(op))?; - Ok(self.closed_bin_op(op)) + if self.next_if_eq_single(Single::Amp) { + Ok(TokenKind::ComparisonOperator(ComparisonOperator::AmpAmp)) + } else { + Ok(self.closed_bin_op(ClosedBinaryOperator::Amp)) + } } - Single::Apos => { - self.expect(raw::TokenKind::Ident, TokenKind::AposIdent)?; - Ok(TokenKind::AposIdent) + Single::At => { + let complete = TokenKind::Keyword(Keyword::Annotation); + self.expect(raw::TokenKind::Ident, complete)?; + self.kleen_star( + &[raw::TokenKind::Single(Single::Dot), raw::TokenKind::Ident], + complete, + )?; + Ok(TokenKind::Keyword(Keyword::Annotation)) } - Single::At => Ok(TokenKind::At), Single::Bang => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::Ne) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::BangEq)) } else { - Ok(TokenKind::Bang) + Ok(TokenKind::UnaryOperator(UnaryOperator::Bang)) } } Single::Bar => { if self.next_if_eq_single(Single::Bar) { - let op = ClosedBinOp::BarBarBar; - self.expect_single(Single::Bar, TokenKind::ClosedBinOp(op))?; - Ok(self.closed_bin_op(op)) - } else { - Ok(TokenKind::Bar) - } - } - Single::Caret => { - if self.next_if_eq_single(Single::Caret) { - let op = ClosedBinOp::CaretCaretCaret; - self.expect_single(Single::Caret, TokenKind::ClosedBinOp(op))?; - Ok(self.closed_bin_op(op)) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::BarBar)) } else { - Ok(self.closed_bin_op(ClosedBinOp::Caret)) + Ok(self.closed_bin_op(ClosedBinaryOperator::Bar)) } } + Single::Caret => Ok(self.closed_bin_op(ClosedBinaryOperator::Caret)), Single::Close(delim) => Ok(TokenKind::Close(delim)), - Single::Colon => { - if self.next_if_eq_single(Single::Colon) { - Ok(TokenKind::ColonColon) - } else { - Ok(TokenKind::Colon) - } - } + Single::Colon => Ok(TokenKind::Colon), Single::Comma => Ok(TokenKind::Comma), - Single::Dot => { - if self.next_if_eq_single(Single::Dot) { - if self.next_if_eq_single(Single::Dot) { - Ok(TokenKind::DotDotDot) - } else { - Ok(TokenKind::DotDot) - } - } else { - Ok(TokenKind::Dot) - } - } + Single::Dot => Ok(TokenKind::Dot), Single::Eq => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::EqEq) - } else if self.next_if_eq_single(Single::Gt) { - Ok(TokenKind::FatArrow) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::EqEq)) } else { Ok(TokenKind::Eq) } } Single::Gt => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::Gte) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::GtEq)) } else if self.next_if_eq_single(Single::Gt) { - let op = ClosedBinOp::GtGtGt; - self.expect_single(Single::Gt, TokenKind::ClosedBinOp(op))?; - Ok(self.closed_bin_op(op)) + Ok(self.closed_bin_op(ClosedBinaryOperator::GtGt)) } else { - Ok(TokenKind::Gt) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::Gt)) } } Single::Lt => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::Lte) - } else if self.next_if_eq_single(Single::Minus) { - Ok(TokenKind::LArrow) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::LtEq)) } else if self.next_if_eq_single(Single::Lt) { - let op = ClosedBinOp::LtLtLt; - self.expect_single(Single::Lt, TokenKind::ClosedBinOp(op))?; - Ok(self.closed_bin_op(op)) + Ok(self.closed_bin_op(ClosedBinaryOperator::LtLt)) } else { - Ok(TokenKind::Lt) + Ok(TokenKind::ComparisonOperator(ComparisonOperator::Lt)) } } Single::Minus => { if self.next_if_eq_single(Single::Gt) { - Ok(TokenKind::RArrow) + Ok(TokenKind::Arrow) } else { - Ok(self.closed_bin_op(ClosedBinOp::Minus)) + Ok(self.closed_bin_op(ClosedBinaryOperator::Minus)) } } Single::Open(delim) => Ok(TokenKind::Open(delim)), - Single::Percent => Ok(self.closed_bin_op(ClosedBinOp::Percent)), + Single::Percent => Ok(self.closed_bin_op(ClosedBinaryOperator::Percent)), Single::Plus => { if self.next_if_eq_single(Single::Plus) { Ok(TokenKind::PlusPlus) } else { - Ok(self.closed_bin_op(ClosedBinOp::Plus)) + Ok(self.closed_bin_op(ClosedBinaryOperator::Plus)) } } - Single::Semi => Ok(TokenKind::Semi), - Single::Slash => Ok(self.closed_bin_op(ClosedBinOp::Slash)), - Single::Star => Ok(self.closed_bin_op(ClosedBinOp::Star)), - Single::Tilde => { - let complete = TokenKind::TildeTildeTilde; - self.expect_single(Single::Tilde, complete)?; - self.expect_single(Single::Tilde, complete)?; - Ok(complete) + Single::Semi => Ok(TokenKind::Semicolon), + Single::Slash => Ok(self.closed_bin_op(ClosedBinaryOperator::Slash)), + Single::Star => { + if self.next_if_eq_single(Single::Star) { + Ok(self.closed_bin_op(ClosedBinaryOperator::StarStar)) + } else { + Ok(self.closed_bin_op(ClosedBinaryOperator::Star)) + } } + Single::Tilde => Ok(TokenKind::UnaryOperator(UnaryOperator::Tilde)), } } - fn closed_bin_op(&mut self, op: ClosedBinOp) -> TokenKind { + fn closed_bin_op(&mut self, op: ClosedBinaryOperator) -> TokenKind { if self.next_if_eq_single(Single::Eq) { - TokenKind::BinOpEq(op) + TokenKind::BinaryOperatorEq(op) } else { - TokenKind::ClosedBinOp(op) + TokenKind::BinaryOperator(op) } } fn ident(&mut self, ident: &str) -> TokenKind { match ident { - "and" => self.closed_bin_op(ClosedBinOp::And), - "or" => self.closed_bin_op(ClosedBinOp::Or), - "w" if self.next_if_eq_single(Single::Slash) => { - if self.next_if_eq_single(Single::Eq) { - TokenKind::WSlashEq - } else { - TokenKind::WSlash - } - } + "gphase" => TokenKind::GPhase, + "inv" => TokenKind::Inv, + "pow" => TokenKind::Pow, + "ctrl" => TokenKind::Ctrl, + "negctrl" => TokenKind::NegCtrl, + "dim" => TokenKind::Dim, + "durationof" => TokenKind::DurationOf, + "delay" => TokenKind::Delay, + "reset" => TokenKind::Reset, + "measure" => TokenKind::Measure, + "barrier" => TokenKind::Barrier, + "false" | "true" => TokenKind::Literal(Literal::Boolean), ident => ident .parse() .map(TokenKind::Keyword) - .unwrap_or(TokenKind::Ident), + .unwrap_or(TokenKind::Identifier), } } } diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs index 3065d6fd48..1bebf33bf7 100644 --- a/compiler/qsc_qasm3/src/lex/cooked/tests.rs +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -13,47 +13,41 @@ fn check(input: &str, expect: &Expect) { fn op_string(kind: TokenKind) -> Option { match kind { - TokenKind::AposIdent => Some("'T".to_string()), - TokenKind::At => Some("@".to_string()), - TokenKind::Bang => Some("!".to_string()), - TokenKind::Bar => Some("|".to_string()), - TokenKind::BinOpEq(op) => Some(format!("{op}=")), TokenKind::Close(Delim::Brace) => Some("}".to_string()), TokenKind::Close(Delim::Bracket) => Some("]".to_string()), TokenKind::Close(Delim::Paren) => Some(")".to_string()), - TokenKind::ClosedBinOp(op) => Some(op.to_string()), TokenKind::Colon => Some(":".to_string()), - TokenKind::ColonColon => Some("::".to_string()), TokenKind::Comma => Some(",".to_string()), TokenKind::Dot => Some(".".to_string()), - TokenKind::DotDot => Some("..".to_string()), - TokenKind::DotDotDot => Some("...".to_string()), TokenKind::Eq => Some("=".to_string()), - TokenKind::EqEq => Some("==".to_string()), - TokenKind::FatArrow => Some("=>".to_string()), - TokenKind::Gt => Some(">".to_string()), - TokenKind::Gte => Some(">=".to_string()), - TokenKind::LArrow => Some("<-".to_string()), - TokenKind::Lt => Some("<".to_string()), - TokenKind::Lte => Some("<=".to_string()), - TokenKind::Ne => Some("!=".to_string()), TokenKind::Open(Delim::Brace) => Some("{".to_string()), TokenKind::Open(Delim::Bracket) => Some("[".to_string()), TokenKind::Open(Delim::Paren) => Some("(".to_string()), TokenKind::PlusPlus => Some("++".to_string()), - TokenKind::RArrow => Some("->".to_string()), - TokenKind::Semi => Some(";".to_string()), - TokenKind::TildeTildeTilde => Some("~~~".to_string()), - TokenKind::WSlash => Some("w/".to_string()), - TokenKind::WSlashEq => Some("w/=".to_string()), - TokenKind::BigInt(_) - | TokenKind::DocComment - | TokenKind::Eof - | TokenKind::Float - | TokenKind::Ident - | TokenKind::Int(_) - | TokenKind::Keyword(_) - | TokenKind::String(_) => None, + TokenKind::Keyword(_) => None, + TokenKind::Type(_) => todo!(), + TokenKind::GPhase => todo!(), + TokenKind::Inv => todo!(), + TokenKind::Pow => todo!(), + TokenKind::Ctrl => todo!(), + TokenKind::NegCtrl => todo!(), + TokenKind::Dim => todo!(), + TokenKind::DurationOf => todo!(), + TokenKind::Delay => todo!(), + TokenKind::Reset => todo!(), + TokenKind::Measure => todo!(), + TokenKind::Barrier => todo!(), + TokenKind::Literal(literal) => todo!(), + TokenKind::Semicolon => todo!(), + TokenKind::Arrow => todo!(), + TokenKind::UnaryOperator(unary_operator) => todo!(), + TokenKind::BinaryOperator(closed_binary_operator) => todo!(), + TokenKind::BinaryOperatorEq(closed_binary_operator) => todo!(), + TokenKind::ComparisonOperator(comparison_operator) => todo!(), + TokenKind::Identifier => todo!(), + TokenKind::HardwareQubit => todo!(), + TokenKind::Whitespace => todo!(), + TokenKind::Comment => todo!(), } } @@ -183,20 +177,16 @@ fn amp_multibyte() { "&🦀", &expect![[r#" [ - Err( - Incomplete( - Single( + Ok( + Token { + kind: BinaryOperator( Amp, ), - ClosedBinOp( - AmpAmpAmp, - ), - Unknown, - Span { - lo: 1, - hi: 5, + span: Span { + lo: 0, + hi: 1, }, - ), + }, ), Err( Unknown( @@ -213,9 +203,9 @@ fn amp_multibyte() { } #[test] -fn amp_amp_amp_amp_amp_amp() { +fn amp_amp_amp_amp() { check( - "&&&&&&", + "&&&&", &expect![[r#" [ Ok( @@ -245,111 +235,6 @@ fn amp_amp_amp_amp_amp_amp() { ); } -#[test] -fn caret_caret() { - check( - "^^", - &expect![[r#" - [ - Err( - IncompleteEof( - Single( - Caret, - ), - ClosedBinOp( - CaretCaretCaret, - ), - Span { - lo: 2, - hi: 2, - }, - ), - ), - ] - "#]], - ); -} - -#[test] -fn and_ws_eq() { - check( - "and =", - &expect![[r#" - [ - Ok( - Token { - kind: ClosedBinOp( - And, - ), - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: Eq, - span: Span { - lo: 4, - hi: 5, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn w() { - check( - "w", - &expect![[r#" - [ - Ok( - Token { - kind: Ident, - span: Span { - lo: 0, - hi: 1, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn w_slash_eq_ident() { - check( - "w/=foo", - &expect![[r#" - [ - Ok( - Token { - kind: WSlashEq, - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 3, - hi: 6, - }, - }, - ), - ] - "#]], - ); -} - #[test] fn int() { check( diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 6751d5a6b2..c983536ac7 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -5,7 +5,7 @@ //! whitespace, and comments. Keywords are treated as identifiers. The raw token stream is //! contiguous: there are no gaps between tokens. //! -//! These are "raw" tokens because single-character operators don't always correspond to Q# +//! These are "raw" tokens because single-character operators don't always correspond to `OpenQASM` //! operators, and whitespace and comments will later be discarded. Raw tokens are the ingredients //! that are "cooked" into compound tokens before they can be consumed by the parser. //! @@ -15,7 +15,7 @@ #[cfg(test)] mod tests; -use super::{Delim, InterpolatedEnding, InterpolatedStart, Radix}; +use super::{Delim, Radix}; use enum_iterator::Sequence; use std::{ fmt::{self, Display, Formatter, Write}, @@ -49,7 +49,6 @@ impl Display for TokenKind { TokenKind::Comment(CommentKind::Block) => f.write_str("block comment"), TokenKind::Comment(CommentKind::Normal) => f.write_str("comment"), TokenKind::Ident => f.write_str("identifier"), - TokenKind::Number(Number::BigInt(_)) => f.write_str("big integer"), TokenKind::Number(Number::Float) => f.write_str("float"), TokenKind::Number(Number::Int(_)) => f.write_str("integer"), TokenKind::Single(single) => write!(f, "`{single}`"), @@ -65,8 +64,6 @@ impl Display for TokenKind { pub enum Single { /// `&` Amp, - /// `'` - Apos, /// `@` At, /// `!` @@ -111,7 +108,6 @@ impl Display for Single { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.write_char(match self { Single::Amp => '&', - Single::Apos => '\'', Single::At => '@', Single::Bang => '!', Single::Bar => '|', @@ -141,21 +137,13 @@ impl Display for Single { #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum Number { - BigInt(Radix), Float, Int(Radix), } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum StringToken { - Normal { terminated: bool }, - Interpolated(InterpolatedStart, Option), -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum StringKind { - Normal, - Interpolated, +pub struct StringToken { + pub terminated: bool, } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] @@ -167,7 +155,6 @@ pub enum CommentKind { #[derive(Clone)] pub struct Lexer<'a> { chars: Peekable>, - interpolation: u8, starting_offset: u32, } @@ -176,7 +163,6 @@ impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Self { Self { chars: input.char_indices().peekable(), - interpolation: 0, starting_offset: 0, } } @@ -185,7 +171,6 @@ impl<'a> Lexer<'a> { pub fn new_with_starting_offset(input: &'a str, starting_offset: u32) -> Self { Self { chars: input.char_indices().peekable(), - interpolation: 0, starting_offset, } } @@ -267,9 +252,7 @@ impl<'a> Lexer<'a> { }; self.eat_while(|c| c == '_' || c.is_digit(radix.into())); - if self.next_if_eq('L') { - Some(Number::BigInt(radix)) - } else if radix == Radix::Decimal && self.float() { + if radix == Radix::Decimal && self.float() { Some(Number::Float) } else { Some(Number::Int(radix)) @@ -285,8 +268,6 @@ impl<'a> Lexer<'a> { if self.float() { Some(Number::Float) - } else if self.next_if_eq('L') { - Some(Number::BigInt(Radix::Decimal)) } else { Some(Number::Int(Radix::Decimal)) } @@ -315,65 +296,20 @@ impl<'a> Lexer<'a> { } fn string(&mut self, c: char) -> Option { - let kind = self.start_string(c)?; + if c != '"' { + return None; + } - while self.first().is_some_and(|c| !is_string_terminator(kind, c)) { - self.eat_while(|c| c != '\\' && !is_string_terminator(kind, c)); + while self.first().is_some_and(|c| c != '"') { + self.eat_while(|c| c != '\\' && c != '"'); if self.next_if_eq('\\') { self.chars.next(); } } - Some(TokenKind::String(self.finish_string(c, kind))) - } - - fn start_string(&mut self, c: char) -> Option { - if c == '$' { - if self.next_if_eq('"') { - Some(StringKind::Interpolated) - } else { - None - } - } else if c == '"' { - Some(StringKind::Normal) - } else if self.interpolation > 0 && c == '}' { - self.interpolation = self - .interpolation - .checked_sub(1) - .expect("interpolation level should have been incremented at left brace"); - Some(StringKind::Interpolated) - } else { - None - } - } - - fn finish_string(&mut self, start: char, kind: StringKind) -> StringToken { - match kind { - StringKind::Normal => StringToken::Normal { - terminated: self.next_if_eq('"'), - }, - StringKind::Interpolated => { - let start = if start == '$' { - InterpolatedStart::DollarQuote - } else { - InterpolatedStart::RBrace - }; - - let end = if self.next_if_eq('{') { - self.interpolation = self - .interpolation - .checked_add(1) - .expect("interpolation should not exceed maximum depth"); - Some(InterpolatedEnding::LBrace) - } else if self.next_if_eq('"') { - Some(InterpolatedEnding::Quote) - } else { - None // Unterminated string. - }; - - StringToken::Interpolated(start, end) - } - } + Some(TokenKind::String(StringToken { + terminated: self.next_if_eq('"'), + })) } } @@ -411,7 +347,6 @@ fn single(c: char) -> Option { ':' => Some(Single::Colon), '!' => Some(Single::Bang), '.' => Some(Single::Dot), - '\'' => Some(Single::Apos), '(' => Some(Single::Open(Delim::Paren)), ')' => Some(Single::Close(Delim::Paren)), '[' => Some(Single::Open(Delim::Bracket)), @@ -433,7 +368,3 @@ fn single(c: char) -> Option { _ => None, } } - -fn is_string_terminator(kind: StringKind, c: char) -> bool { - c == '"' || kind == StringKind::Interpolated && c == '{' -} diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index 1a8e014ee9..cf24c86747 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -118,25 +118,17 @@ fn comment() { } #[test] -fn doc_comment() { +fn block_comment() { check( - "///comment\nx", + "/* comment\n x */", &expect![[r#" [ Token { kind: Comment( - Doc, + Block, ), offset: 0, }, - Token { - kind: Whitespace, - offset: 10, - }, - Token { - kind: Ident, - offset: 11, - }, ] "#]], ); @@ -175,7 +167,7 @@ fn string() { [ Token { kind: String( - Normal { + StringToken { terminated: true, }, ), @@ -194,7 +186,7 @@ fn string_escape_quote() { [ Token { kind: String( - Normal { + StringToken { terminated: true, }, ), @@ -213,7 +205,7 @@ fn string_missing_ending() { [ Token { kind: String( - Normal { + StringToken { terminated: false, }, ), @@ -224,574 +216,6 @@ fn string_missing_ending() { ); } -#[test] -fn interpolated_string_missing_ending() { - check( - r#"$"string"#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - None, - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string() { - check( - r#"$"string""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - Quote, - ), - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_braced() { - check( - r#"$"{x}""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: Ident, - offset: 3, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 4, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_escape_brace() { - check( - r#"$"\{""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - Quote, - ), - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_unclosed_brace() { - check( - r#"$"{"#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_unclosed_brace_quote() { - check( - r#"$"{""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: String( - Normal { - terminated: false, - }, - ), - offset: 3, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_unopened_brace() { - check( - r#"$"}"#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - None, - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_unopened_brace_quote() { - check( - r#"$"}""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - Quote, - ), - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_braced_index() { - check( - r#"$"{xs[0]}""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: Ident, - offset: 3, - }, - Token { - kind: Single( - Open( - Bracket, - ), - ), - offset: 5, - }, - Token { - kind: Number( - Int( - Decimal, - ), - ), - offset: 6, - }, - Token { - kind: Single( - Close( - Bracket, - ), - ), - offset: 7, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 8, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_two_braced() { - check( - r#"$"{x} {y}""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: Ident, - offset: 3, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - LBrace, - ), - ), - ), - offset: 4, - }, - Token { - kind: Ident, - offset: 7, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 8, - }, - ] - "#]], - ); -} - -#[test] -fn interpolated_string_braced_normal_string() { - check( - r#"$"{"{}"}""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: String( - Normal { - terminated: true, - }, - ), - offset: 3, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 7, - }, - ] - "#]], - ); -} - -#[test] -fn nested_interpolated_string() { - check( - r#"$"{$"{x}"}""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 3, - }, - Token { - kind: Ident, - offset: 6, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 7, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 9, - }, - ] - "#]], - ); -} - -#[test] -fn nested_interpolated_string_with_exprs() { - check( - r#"$"foo {x + $"bar {y}"} baz""#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: Ident, - offset: 7, - }, - Token { - kind: Whitespace, - offset: 8, - }, - Token { - kind: Single( - Plus, - ), - offset: 9, - }, - Token { - kind: Whitespace, - offset: 10, - }, - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 11, - }, - Token { - kind: Ident, - offset: 18, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 19, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 21, - }, - ] - "#]], - ); -} - -#[test] -fn nested_interpolated_string_followed_by_braces() { - check( - r#"$"{$"{x}"}" {y}"#, - &expect![[r#" - [ - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 0, - }, - Token { - kind: String( - Interpolated( - DollarQuote, - Some( - LBrace, - ), - ), - ), - offset: 3, - }, - Token { - kind: Ident, - offset: 6, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 7, - }, - Token { - kind: String( - Interpolated( - RBrace, - Some( - Quote, - ), - ), - ), - offset: 9, - }, - Token { - kind: Whitespace, - offset: 11, - }, - Token { - kind: Single( - Open( - Brace, - ), - ), - offset: 12, - }, - Token { - kind: Ident, - offset: 13, - }, - Token { - kind: Single( - Close( - Brace, - ), - ), - offset: 14, - }, - ] - "#]], - ); -} - #[test] fn binary() { check( @@ -1046,25 +470,6 @@ fn hexadecimal() { ); } -#[test] -fn bigint() { - check( - "123L", - &expect![[r#" - [ - Token { - kind: Number( - BigInt( - Decimal, - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - #[test] fn negative() { check( @@ -1115,25 +520,6 @@ fn positive() { ); } -#[test] -fn bigint_hexadecimal() { - check( - "0x123abcL", - &expect![[r#" - [ - Token { - kind: Number( - BigInt( - Hexadecimal, - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - #[test] fn float() { check( diff --git a/compiler/qsc_qasm3/src/oqasm_ast.rs b/compiler/qsc_qasm3/src/oqasm_ast.rs index 3790b56410..e86ee1e170 100644 --- a/compiler/qsc_qasm3/src/oqasm_ast.rs +++ b/compiler/qsc_qasm3/src/oqasm_ast.rs @@ -480,13 +480,13 @@ struct Literal { } enum LiteralKind { - Integer(i64), - Float(f64), - Imaginary(f64), - Boolean(bool), + Array(List), Bitstring { value: usize, width: u32 }, + Boolean(bool), Duration { value: f64, unit: TimeUnit }, - Array(List), + Float(f64), + Imaginary(f64), + Integer(i64), } struct Concatenation { From 2d8baa5750a397764ed1fef2ccae85584b62227a Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Wed, 29 Jan 2025 14:35:27 -0800 Subject: [PATCH 07/20] all tokens cooked --- compiler/qsc_qasm3/src/lex/cooked.rs | 302 +++++++++++++++++---- compiler/qsc_qasm3/src/lex/cooked/tests.rs | 2 - compiler/qsc_qasm3/src/lex/raw.rs | 161 +++++++++-- compiler/qsc_qasm3/src/lex/raw/tests.rs | 40 +-- compiler/qsc_qasm3/src/oqasm_ast.rs | 1 + 5 files changed, 407 insertions(+), 99 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index ef6e485368..3da3efc8e6 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -25,6 +25,7 @@ use qsc_data_structures::span::Span; use std::{ fmt::{self, Display, Formatter}, iter::Peekable, + str::FromStr, }; use thiserror::Error; @@ -127,9 +128,55 @@ pub enum TokenKind { Identifier, HardwareQubit, +} - Whitespace, - Comment, +impl Display for TokenKind { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + TokenKind::Keyword(keyword) => write!(f, "keyword `{keyword}`"), + TokenKind::Type(type_) => write!(f, "keyword `{type_}`"), + TokenKind::GPhase => write!(f, "gphase"), + TokenKind::Inv => write!(f, "inv"), + TokenKind::Pow => write!(f, "pow"), + TokenKind::Ctrl => write!(f, "ctrl"), + TokenKind::NegCtrl => write!(f, "negctrl"), + TokenKind::Dim => write!(f, "dim"), + TokenKind::DurationOf => write!(f, "durationof"), + TokenKind::Delay => write!(f, "delay"), + TokenKind::Reset => write!(f, "reset"), + TokenKind::Measure => write!(f, "measure"), + TokenKind::Barrier => write!(f, "barrier"), + TokenKind::Literal(literal) => write!(f, "literal `{literal}`"), + TokenKind::Open(Delim::Brace) => write!(f, "`{{`"), + TokenKind::Open(Delim::Bracket) => write!(f, "`[`"), + TokenKind::Open(Delim::Paren) => write!(f, "`(`"), + TokenKind::Close(Delim::Brace) => write!(f, "`}}`"), + TokenKind::Close(Delim::Bracket) => write!(f, "`]`"), + TokenKind::Close(Delim::Paren) => write!(f, "`)`"), + TokenKind::Colon => write!(f, "`:`"), + TokenKind::Semicolon => write!(f, "`;`"), + TokenKind::Dot => write!(f, "`.`"), + TokenKind::Comma => write!(f, "`,`"), + TokenKind::PlusPlus => write!(f, "`++`"), + TokenKind::Arrow => write!(f, "`->`"), + TokenKind::UnaryOperator(op) => write!(f, "`{op}`"), + TokenKind::BinaryOperator(op) => write!(f, "`{op}`"), + TokenKind::BinaryOperatorEq(op) => write!(f, "`{op}=`"), + TokenKind::ComparisonOperator(op) => write!(f, "`{op}`"), + TokenKind::Eq => write!(f, "`=`"), + TokenKind::Identifier => write!(f, "identifier"), + TokenKind::HardwareQubit => write!(f, "hardware bit"), + } + } +} + +impl From for TokenKind { + fn from(value: Number) -> Self { + match value { + Number::Float => Self::Literal(Literal::Float), + Number::Int(radix) => Self::Literal(Literal::Integer(radix)), + } + } } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] @@ -158,6 +205,61 @@ pub enum Type { Stretch, } +impl Display for Type { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Type::Input => "input", + Type::Output => "output", + Type::Const => "const", + Type::Readonly => "readonly", + Type::Mutable => "mutable", + Type::QReg => "qreg", + Type::Qubit => "qubit", + Type::CReg => "creg", + Type::Bool => "bool", + Type::Bit => "bit", + Type::Int => "int", + Type::UInt => "uint", + Type::Float => "float", + Type::Angle => "angle", + Type::Complex => "complex", + Type::Array => "array", + Type::Void => "void", + Type::Duration => "duration", + Type::Stretch => "stretch", + }) + } +} + +impl FromStr for Type { + type Err = (); + + fn from_str(s: &str) -> Result { + match s { + "input" => Ok(Type::Input), + "output" => Ok(Type::Output), + "const" => Ok(Type::Const), + "readonly" => Ok(Type::Readonly), + "mutable" => Ok(Type::Mutable), + "qreg" => Ok(Type::QReg), + "qubit" => Ok(Type::Qubit), + "creg" => Ok(Type::CReg), + "bool" => Ok(Type::Bool), + "bit" => Ok(Type::Bit), + "int" => Ok(Type::Int), + "uint" => Ok(Type::UInt), + "float" => Ok(Type::Float), + "angle" => Ok(Type::Angle), + "complex" => Ok(Type::Complex), + "array" => Ok(Type::Array), + "void" => Ok(Type::Void), + "duration" => Ok(Type::Duration), + "stretch" => Ok(Type::Stretch), + _ => Err(()), + } + } +} + #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum Literal { Bitstring, @@ -166,7 +268,35 @@ pub enum Literal { Imaginary, Integer(Radix), String, - Timing, + Timing(TimingLiteralKind), +} + +impl Display for Literal { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Literal::Bitstring => "bitstring", + Literal::Boolean => "boolean", + Literal::Float => "float", + Literal::Imaginary => "imaginary", + Literal::Integer(_) => "integer", + Literal::String => "string", + Literal::Timing(_) => "timing", + }) + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum TimingLiteralKind { + /// Timing literal: TODO: what is this? + Dt, + /// Timing literal: Nanoseconds. + Ns, + /// Timing literal: Microseconds. + Us, + /// Timing literal: Milliseconds. + Ms, + /// Timing literal: Seconds. + S, } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] @@ -179,6 +309,16 @@ pub enum UnaryOperator { Tilde, } +impl Display for UnaryOperator { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(match self { + UnaryOperator::Bang => "!", + UnaryOperator::Minus => "-", + UnaryOperator::Tilde => "~", + }) + } +} + /// A binary operator that returns the same type as the type of its first operand; in other words, /// the domain of the first operand is closed under this operation. These are candidates for /// compound assignment operators, like `+=`. @@ -210,6 +350,24 @@ pub enum ClosedBinaryOperator { // But this is this a bug in the official qasm lexer? } +impl Display for ClosedBinaryOperator { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + f.write_str(match self { + ClosedBinaryOperator::Amp => "&", + ClosedBinaryOperator::Bar => "|", + ClosedBinaryOperator::Caret => "^", + ClosedBinaryOperator::GtGt => ">>", + ClosedBinaryOperator::LtLt => "<<", + ClosedBinaryOperator::Minus => "-", + ClosedBinaryOperator::Percent => "%", + ClosedBinaryOperator::Plus => "+", + ClosedBinaryOperator::Slash => "/", + ClosedBinaryOperator::Star => "*", + ClosedBinaryOperator::StarStar => "**", + }) + } +} + #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum ComparisonOperator { /// `&&` @@ -230,24 +388,18 @@ pub enum ComparisonOperator { LtEq, } -impl Display for TokenKind { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - todo!() - } -} - -impl From for TokenKind { - fn from(value: Number) -> Self { - match value { - Number::Float => Self::Literal(Literal::Float), - Number::Int(radix) => Self::Literal(Literal::Integer(radix)), - } - } -} - -impl Display for ClosedBinaryOperator { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - todo!() +impl Display for ComparisonOperator { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.write_str(match self { + ComparisonOperator::AmpAmp => "&&", + ComparisonOperator::BangEq => "!=", + ComparisonOperator::BarBar => "||", + ComparisonOperator::EqEq => "==", + ComparisonOperator::Gt => ">", + ComparisonOperator::GtEq => ">=", + ComparisonOperator::Lt => "<", + ComparisonOperator::LtEq => "<=", + }) } } @@ -302,25 +454,86 @@ impl<'a> Lexer<'a> { } } + /// Returns the first token ahead of the cursor without consuming it. This operation is fast, + /// but if you know you want to consume the token if it matches, use [`next_if_eq`] instead. + fn first(&mut self) -> Option { + self.tokens.peek().map(|i| i.kind) + } + + /// Returns the second token ahead of the cursor without consuming it. This is slower + /// than [`first`] and should be avoided when possible. + fn second(&self) -> Option { + let mut tokens = self.tokens.clone(); + tokens.next(); + tokens.next().map(|i| i.kind) + } + + /// Consumes a list of tokens zero or more times. + fn kleen_star(&mut self, tokens: &[raw::TokenKind], complete: TokenKind) -> Result<(), Error> { + let mut iter = tokens.iter(); + while self.next_if_eq(*(iter.next().expect("tokens should have at least one token"))) { + for token in iter { + self.expect(*token, complete)? + } + iter = tokens.iter(); + } + Ok(()) + } + fn cook(&mut self, token: &raw::Token) -> Result, Error> { let kind = match token.kind { - raw::TokenKind::Comment(raw::CommentKind::Block | raw::CommentKind::Normal) - | raw::TokenKind::Whitespace => Ok(None), + raw::TokenKind::Bitstring { terminated: true } => { + Ok(Some(TokenKind::Literal(Literal::Bitstring))) + } + raw::TokenKind::Bitstring { terminated: false } => { + Err(Error::UnterminatedString(Span { + lo: token.offset, + hi: token.offset, + })) + } + raw::TokenKind::Comment(_) | raw::TokenKind::Newline | raw::TokenKind::Whitespace => { + Ok(None) + } raw::TokenKind::Ident => { let ident = &self.input[(token.offset as usize)..(self.offset() as usize)]; Ok(Some(self.ident(ident))) } - raw::TokenKind::Number(number) => Ok(Some(number.into())), + raw::TokenKind::HardwareQubit => Ok(Some(TokenKind::HardwareQubit)), + raw::TokenKind::LiteralFragment(_) => { + // if a literal fragment does not appear after a decimal + // or a float, treat it as an identifier. + Ok(Some(TokenKind::Identifier)) + } + raw::TokenKind::Number(number) => { + // after reading a decimal number or a float there could be a whitespace + // followed by a fragment, which will change the type of the literal. + if let ( + Some(raw::TokenKind::Whitespace), + Some(raw::TokenKind::LiteralFragment(fragment)), + ) = (self.first(), self.second()) + { + use self::Literal::{Imaginary, Timing}; + use TokenKind::Literal; + Ok(Some(match fragment { + raw::LiteralFragmentKind::Imag => Literal(Imaginary), + raw::LiteralFragmentKind::Dt => Literal(Timing(TimingLiteralKind::Dt)), + raw::LiteralFragmentKind::Ns => Literal(Timing(TimingLiteralKind::Ns)), + raw::LiteralFragmentKind::Us => Literal(Timing(TimingLiteralKind::Us)), + raw::LiteralFragmentKind::Ms => Literal(Timing(TimingLiteralKind::Ms)), + raw::LiteralFragmentKind::S => Literal(Timing(TimingLiteralKind::S)), + })) + } else { + Ok(Some(number.into())) + } + } raw::TokenKind::Single(single) => self.single(single).map(Some), - raw::TokenKind::String(raw::StringToken { terminated: true }) => { + raw::TokenKind::String { terminated: true } => { Ok(Some(TokenKind::Literal(Literal::String))) } - raw::TokenKind::String(raw::StringToken { terminated: false }) => { - Err(Error::UnterminatedString(Span { - lo: token.offset, - hi: token.offset, - })) - } + raw::TokenKind::String { terminated: false } => Err(Error::UnterminatedString(Span { + lo: token.offset, + hi: token.offset, + })), raw::TokenKind::Unknown => { let c = self.input[(token.offset as usize)..] .chars() @@ -343,18 +556,6 @@ impl<'a> Lexer<'a> { })) } - /// Consumes a list of tokens zero or more times. - fn kleen_star(&mut self, tokens: &[raw::TokenKind], complete: TokenKind) -> Result<(), Error> { - let mut iter = tokens.iter(); - while self.next_if_eq(*(iter.next().expect("tokens should have at least one token"))) { - for token in iter { - self.expect(*token, complete)? - } - iter = tokens.iter(); - } - Ok(()) - } - #[allow(clippy::too_many_lines)] fn single(&mut self, single: Single) -> Result { match single { @@ -372,7 +573,7 @@ impl<'a> Lexer<'a> { &[raw::TokenKind::Single(Single::Dot), raw::TokenKind::Ident], complete, )?; - Ok(TokenKind::Keyword(Keyword::Annotation)) + Ok(complete) } Single::Bang => { if self.next_if_eq_single(Single::Eq) { @@ -469,10 +670,15 @@ impl<'a> Lexer<'a> { "measure" => TokenKind::Measure, "barrier" => TokenKind::Barrier, "false" | "true" => TokenKind::Literal(Literal::Boolean), - ident => ident - .parse() - .map(TokenKind::Keyword) - .unwrap_or(TokenKind::Identifier), + ident => { + if let Ok(keyword) = ident.parse::() { + TokenKind::Keyword(keyword) + } else if let Ok(type_) = ident.parse::() { + TokenKind::Type(type_) + } else { + TokenKind::Identifier + } + } } } } diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs index 1bebf33bf7..b5297ef55c 100644 --- a/compiler/qsc_qasm3/src/lex/cooked/tests.rs +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -46,8 +46,6 @@ fn op_string(kind: TokenKind) -> Option { TokenKind::ComparisonOperator(comparison_operator) => todo!(), TokenKind::Identifier => todo!(), TokenKind::HardwareQubit => todo!(), - TokenKind::Whitespace => todo!(), - TokenKind::Comment => todo!(), } } diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index c983536ac7..8554e5107d 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -34,11 +34,15 @@ pub struct Token { #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum TokenKind { + Bitstring { terminated: bool }, Comment(CommentKind), + HardwareQubit, Ident, + LiteralFragment(LiteralFragmentKind), + Newline, Number(Number), Single(Single), - String(StringToken), + String { terminated: bool }, Unknown, Whitespace, } @@ -46,13 +50,17 @@ pub enum TokenKind { impl Display for TokenKind { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { + TokenKind::Bitstring { .. } => f.write_str("bitstring"), TokenKind::Comment(CommentKind::Block) => f.write_str("block comment"), TokenKind::Comment(CommentKind::Normal) => f.write_str("comment"), + TokenKind::HardwareQubit => f.write_str("hardware qubit"), TokenKind::Ident => f.write_str("identifier"), + TokenKind::LiteralFragment(_) => f.write_str("literal fragment"), + TokenKind::Newline => f.write_str("newline"), TokenKind::Number(Number::Float) => f.write_str("float"), TokenKind::Number(Number::Int(_)) => f.write_str("integer"), TokenKind::Single(single) => write!(f, "`{single}`"), - TokenKind::String(_) => f.write_str("string"), + TokenKind::String { .. } => f.write_str("string"), TokenKind::Unknown => f.write_str("unknown"), TokenKind::Whitespace => f.write_str("whitespace"), } @@ -152,6 +160,22 @@ pub enum CommentKind { Normal, } +#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] +pub enum LiteralFragmentKind { + /// Imaginary literal fragment. + Imag, + /// Timing literal: TODO: what is this? + Dt, + /// Timing literal: Nanoseconds. + Ns, + /// Timing literal: Microseconds. + Us, + /// Timing literal: Milliseconds. + Ms, + /// Timing literal: Seconds. + S, +} + #[derive(Clone)] pub struct Lexer<'a> { chars: Peekable>, @@ -179,8 +203,16 @@ impl<'a> Lexer<'a> { self.chars.next_if(|i| i.1 == c).is_some() } - fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) { - while self.chars.next_if(|i| f(i.1)).is_some() {} + /// Consumes the characters while they satisfy `f`. Returns the last character eaten, if any. + fn eat_while(&mut self, mut f: impl FnMut(char) -> bool) -> Option { + let mut last_eaten = None; + loop { + let c = self.chars.next_if(|i| f(i.1)); + if c.is_none() { + return last_eaten.map(|(_, c)| c); + } + last_eaten = c; + } } /// Returns the first character ahead of the cursor without consuming it. This operation is fast, @@ -197,9 +229,18 @@ impl<'a> Lexer<'a> { chars.next().map(|i| i.1) } + fn newline(&mut self, c: char) -> bool { + if is_newline(c) { + self.eat_while(is_newline); + true + } else { + false + } + } + fn whitespace(&mut self, c: char) -> bool { - if c.is_whitespace() { - self.eat_while(char::is_whitespace); + if is_whitespace(c) { + self.eat_while(is_whitespace); true } else { false @@ -208,9 +249,8 @@ impl<'a> Lexer<'a> { fn comment(&mut self, c: char) -> Option { if c == '/' && self.next_if_eq('/') { - let kind = CommentKind::Normal; - self.eat_while(|c| c != '\n' && c != '\r'); - Some(kind) + self.eat_while(|c| !is_newline(c)); + Some(CommentKind::Normal) } else if c == '/' && self.next_if_eq('*') { loop { let (_, c) = self.chars.next()?; @@ -223,12 +263,38 @@ impl<'a> Lexer<'a> { } } - fn ident(&mut self, c: char) -> bool { + fn ident(&mut self, c: char) -> Option { + let first = self.first(); + let second = self.second(); + + // Check for some special literal fragments. + // fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's'; + if c == 's' && first.is_some_and(|c1| c1 != '_' && !char::is_alphanumeric(c1)) { + return Some(TokenKind::LiteralFragment(LiteralFragmentKind::S)); + } + + if let (Some(c1), Some(c2)) = (first, second) { + if c2 != '_' || !c2.is_alphanumeric() { + match (c, c1) { + ('i', 'm') => { + return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Imag)) + } + ('d', 't') => return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Dt)), + ('n', 's') => return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Ns)), + ('u' | 'µ', 's') => { + return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Us)) + } + ('m', 's') => return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Ms)), + _ => (), + } + } + } + if c == '_' || c.is_alphabetic() { self.eat_while(|c| c == '_' || c.is_alphanumeric()); - true + Some(TokenKind::Ident) } else { - false + None } } @@ -275,7 +341,7 @@ impl<'a> Lexer<'a> { fn float(&mut self) -> bool { // Watch out for ranges: `0..` should be an integer followed by two dots. - if self.first() == Some('.') && self.second() != Some('.') { + if self.first() == Some('.') { self.chars.next(); self.eat_while(|c| c == '_' || c.is_ascii_digit()); self.exp(); @@ -295,21 +361,56 @@ impl<'a> Lexer<'a> { } } - fn string(&mut self, c: char) -> Option { - if c != '"' { + fn string(&mut self, string_start: char) -> Option { + if string_start != '"' && string_start != '\'' { return None; } - while self.first().is_some_and(|c| c != '"') { - self.eat_while(|c| c != '\\' && c != '"'); + if let Some(bitstring) = self.bitstring() { + // consume the closing '"' + self.next(); + return Some(bitstring); + } + + while self.first().is_some_and(|c| c != string_start) { + self.eat_while(|c| c != '\\' && c != string_start); if self.next_if_eq('\\') { self.chars.next(); } } - Some(TokenKind::String(StringToken { - terminated: self.next_if_eq('"'), - })) + Some(TokenKind::String { + terminated: self.next_if_eq(string_start), + }) + } + + fn bitstring(&mut self) -> Option { + const STRING_START: char = '"'; + + // A bitstring must have at least one character. + if matches!(self.first(), None | Some(STRING_START)) { + return None; + } + + // A bitstring must end in a 0 or a 1. + if let Some('_') = self.eat_while(is_bitstring_char) { + return None; + } + + match self.first() { + None => Some(TokenKind::Bitstring { terminated: false }), + Some(STRING_START) => Some(TokenKind::Bitstring { terminated: true }), + _ => None, + } + } + + fn hardware_qubit(&mut self, c: char) -> bool { + if c == '$' { + self.eat_while(|c| c.is_ascii_digit()); + true + } else { + false + } } } @@ -322,8 +423,12 @@ impl Iterator for Lexer<'_> { TokenKind::Comment(kind) } else if self.whitespace(c) { TokenKind::Whitespace - } else if self.ident(c) { - TokenKind::Ident + } else if self.newline(c) { + TokenKind::Newline + } else if let Some(ident) = self.ident(c) { + ident + } else if self.hardware_qubit(c) { + TokenKind::HardwareQubit } else { self.number(c) .map(TokenKind::Number) @@ -368,3 +473,15 @@ fn single(c: char) -> Option { _ => None, } } + +fn is_bitstring_char(c: char) -> bool { + c == '0' || c == '1' || c == '_' +} + +fn is_newline(c: char) -> bool { + c == '\n' || c == '\r' +} + +fn is_whitespace(c: char) -> bool { + !is_newline(c) && c.is_whitespace() +} diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index cf24c86747..fcb79039d2 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -105,7 +105,7 @@ fn comment() { offset: 0, }, Token { - kind: Whitespace, + kind: Newline, offset: 9, }, Token { @@ -147,7 +147,7 @@ fn comment_four_slashes() { offset: 0, }, Token { - kind: Whitespace, + kind: Newline, offset: 11, }, Token { @@ -166,11 +166,9 @@ fn string() { &expect![[r#" [ Token { - kind: String( - StringToken { - terminated: true, - }, - ), + kind: String { + terminated: true, + }, offset: 0, }, ] @@ -185,11 +183,9 @@ fn string_escape_quote() { &expect![[r#" [ Token { - kind: String( - StringToken { - terminated: true, - }, - ), + kind: String { + terminated: true, + }, offset: 0, }, ] @@ -204,11 +200,9 @@ fn string_missing_ending() { &expect![[r#" [ Token { - kind: String( - StringToken { - terminated: false, - }, - ), + kind: String { + terminated: false, + }, offset: 0, }, ] @@ -338,25 +332,17 @@ fn number_underscore_prefix() { } #[test] -fn int_dot_dot() { +fn float_dot() { check( "0..", &expect![[r#" [ Token { kind: Number( - Int( - Decimal, - ), + Float, ), offset: 0, }, - Token { - kind: Single( - Dot, - ), - offset: 1, - }, Token { kind: Single( Dot, diff --git a/compiler/qsc_qasm3/src/oqasm_ast.rs b/compiler/qsc_qasm3/src/oqasm_ast.rs index e86ee1e170..dad18cf8e4 100644 --- a/compiler/qsc_qasm3/src/oqasm_ast.rs +++ b/compiler/qsc_qasm3/src/oqasm_ast.rs @@ -517,6 +517,7 @@ enum IOKeyword { } enum TimeUnit { + /// TODO: what is this? Dt, /// Nanoseconds. Ns, From 1248ed8f6a150c47b0125971fa8c2d96fbc66b53 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Wed, 29 Jan 2025 15:04:01 -0800 Subject: [PATCH 08/20] add leading dot floats --- compiler/qsc_qasm3/src/lex/raw.rs | 13 ++++++ compiler/qsc_qasm3/src/lex/raw/tests.rs | 60 +++++-------------------- 2 files changed, 24 insertions(+), 49 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 8554e5107d..4be475c06b 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -302,6 +302,17 @@ impl<'a> Lexer<'a> { self.leading_zero(c).or_else(|| self.decimal(c)) } + fn leading_dot(&mut self, c: char) -> bool { + if c == '.' && self.first().is_some_and(|c| char::is_ascii_digit(&c)) { + self.next(); + self.eat_while(|c| c == '_' || c.is_ascii_digit()); + self.exp(); + true + } else { + false + } + } + fn leading_zero(&mut self, c: char) -> Option { if c != '0' { return None; @@ -429,6 +440,8 @@ impl Iterator for Lexer<'_> { ident } else if self.hardware_qubit(c) { TokenKind::HardwareQubit + } else if self.leading_dot(c) { + TokenKind::Number(Number::Float) } else { self.number(c) .map(TokenKind::Number) diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index fcb79039d2..f902969599 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -355,7 +355,7 @@ fn float_dot() { } #[test] -fn dot_dot_int() { +fn dot_float() { check( "..0", &expect![[r#" @@ -366,19 +366,11 @@ fn dot_dot_int() { ), offset: 0, }, - Token { - kind: Single( - Dot, - ), - offset: 1, - }, Token { kind: Number( - Int( - Decimal, - ), + Float, ), - offset: 2, + offset: 1, }, ] "#]], @@ -386,7 +378,7 @@ fn dot_dot_int() { } #[test] -fn dot_dot_dot_int() { +fn dot_dot_float() { check( "...0", &expect![[r#" @@ -403,19 +395,11 @@ fn dot_dot_dot_int() { ), offset: 1, }, - Token { - kind: Single( - Dot, - ), - offset: 2, - }, Token { kind: Number( - Int( - Decimal, - ), + Float, ), - offset: 3, + offset: 2, }, ] "#]], @@ -548,19 +532,11 @@ fn leading_point() { ".123", &expect![[r#" [ - Token { - kind: Single( - Dot, - ), - offset: 0, - }, Token { kind: Number( - Int( - Decimal, - ), + Float, ), - offset: 1, + offset: 0, }, ] "#]], @@ -654,17 +630,11 @@ fn leading_point_exp() { ".25e2", &expect![[r#" [ - Token { - kind: Single( - Dot, - ), - offset: 0, - }, Token { kind: Number( Float, ), - offset: 1, + offset: 0, }, ] "#]], @@ -755,19 +725,11 @@ fn float_hexadecimal() { ), offset: 0, }, - Token { - kind: Single( - Dot, - ), - offset: 5, - }, Token { kind: Number( - Int( - Decimal, - ), + Float, ), - offset: 6, + offset: 5, }, ] "#]], From 509da8977e77fdce3c6ea2de851bc06cb61b0fc4 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Wed, 29 Jan 2025 15:14:30 -0800 Subject: [PATCH 09/20] remove AST --- compiler/qsc_qasm3/src/lib.rs | 1 - compiler/qsc_qasm3/src/oqasm_ast.rs | 530 ---------------------------- 2 files changed, 531 deletions(-) delete mode 100644 compiler/qsc_qasm3/src/oqasm_ast.rs diff --git a/compiler/qsc_qasm3/src/lib.rs b/compiler/qsc_qasm3/src/lib.rs index 7be343e15b..49b7d8f053 100644 --- a/compiler/qsc_qasm3/src/lib.rs +++ b/compiler/qsc_qasm3/src/lib.rs @@ -8,7 +8,6 @@ pub use compile::qasm_to_program; pub mod io; mod keyword; mod lex; -mod oqasm_ast; mod oqasm_helpers; mod oqasm_types; pub mod parse; diff --git a/compiler/qsc_qasm3/src/oqasm_ast.rs b/compiler/qsc_qasm3/src/oqasm_ast.rs deleted file mode 100644 index dad18cf8e4..0000000000 --- a/compiler/qsc_qasm3/src/oqasm_ast.rs +++ /dev/null @@ -1,530 +0,0 @@ -use qsc::Span; -use std::convert::Infallible; - -// TODO: profile this with iai-callgrind in a large OpenQASM3 -// sample to verify that is actually faster than using Vec. -/// An alternative to `Vec` that uses less stack space. -type List = Box<[Box]>; - -enum Union { - First(T1), - Second(T2), - Third(T3), -} - -enum QASMNode { - Program(Program), - Annotation(Annotation), - Stmt(Stmt), - Expr(Expr), - DiscreteSet(DiscreteSet), - RangeDefinition(RangeDefinition), - IndexedIdentifier(IndexedIdentifier), - QuantumGateModifier(QuantumGateModifier), - QuantumMeasurement(QuantumMeasurement), - ClassicalArgument(ClassicalArgument), - ExternArgument(ExternArgument), - ClassicalType(ClassicalType), - QuantumArgument(QuantumArgument), - Pragma(Pragma), -} - -struct Program { - span: Span, - statements: List>, - version: Option, -} - -struct Annotation { - span: Span, - keyword: String, - command: Option, -} - -struct Stmt { - span: Span, - annotations: List, - kind: Box, -} - -struct Expr { - span: Span, - kind: Box, -} - -struct DiscreteSet { - span: Span, - values: List, -} - -struct RangeDefinition { - span: Span, - start: Option, - end: Option, - step: Option, -} - -struct Identifier { - span: Span, - name: String, -} - -struct IndexedIdentifier { - span: Span, - name: Identifier, - indices: List, -} - -struct QuantumGateModifier { - span: Span, - qubit: Union, -} - -struct QuantumMeasurement { - span: Span, - qubit: Union, -} - -struct ClassicalArgument { - span: Span, - r#type: ClassicalType, - name: Identifier, - access: Option, -} - -struct ExternArgument { - span: Span, - r#type: ClassicalType, - access: Option, -} - -struct ClassicalType { - span: Span, - kind: ClassicalTypeKind, -} - -enum ClassicalTypeKind { - Int(IntType), - UInt(UIntType), - Float(FloatType), - Complex(ComplexType), - Angle(AngleType), - Bit(BitType), - BoolType, - Array(ArrayType), - ArrayReference(ArrayReferenceType), - Duration, - Stretch, -} - -enum ArrayBaseTypeKind { - Int(IntType), - UInt(UIntType), - Float(FloatType), - Complex(ComplexType), - Angle(AngleType), - Bit(BitType), - BoolType, -} - -struct IntType { - size: Option, -} - -struct UIntType { - size: Option, -} - -struct FloatType { - size: Option, -} - -struct ComplexType { - base_size: Option, -} - -struct AngleType { - size: Option, -} - -struct BitType { - size: Option, -} - -struct ArrayType { - span: Span, - base_type: ArrayBaseTypeKind, - dimensions: List, -} - -struct ArrayReferenceType { - span: Span, - base_type: ArrayBaseTypeKind, - dimensions: Union>, -} - -enum AccessControl { - ReadOnly, - Mutable, -} - -struct QuantumArgument { - span: Span, - size: Option, -} - -struct Pragma { - span: Span, - command: String, -} - -enum StmtKind { - CompoundStmt(CompoundStmt), - Include(IncludeStmt), - ExpressionStmt(Expr), - QubitDeclaration(QubitDeclaration), - QuantumGateDefinition(QuantumGateDefinition), - ExternDeclaration(ExternDeclaration), - Quantum(QuantumStmt), - Measurement(QuantumMeasurementStmt), - ClassicalDeclaration(ClassicalDeclaration), - IODeclaration(IODeclaration), - ConstantDeclaration(ConstantDeclaration), - CalibrationGrammarDeclaration(CalibrationGrammarDeclaration), - CalibrationStmt(CalibrationStmt), - CalibrationDefinition(CalibrationDefinition), - SubroutineDefinition(SubroutineDefinition), - Return(ReturnStmt), - Break, - Continue, - Branching(BranchingStmt), - WhileLoop(WhileLoop), - ForInLoop(ForInLoop), - Switch(SwitchStmt), - ClassicalAssignment(ClassicalAssignment), -} - -struct CompoundStmt { - span: Span, - statements: List, -} - -struct IncludeStmt { - span: Span, - filename: String, -} - -struct QubitDeclaration { - span: Span, - qubit: Identifier, - size: Option, -} - -struct QuantumGateDefinition { - span: Span, - name: Identifier, - arguments: Vec, - qubits: Vec, - body: Vec, -} - -struct ExternDeclaration { - span: Span, - name: Identifier, - arguments: List, - return_type: Option, -} - -struct QuantumStmt { - span: Span, - kind: QuantumStmtKind, -} - -enum QuantumStmtKind { - Gate(QuantumGate), - Phase(QuantumPhase), - Barrier(List), - Reset(List>), - DelayInstruction(DelayInstruction), - Box(BoxStmt), -} - -struct QuantumGate { - span: Span, - modifiers: List, - name: Identifier, - args: List, - qubits: List>, - duration: Option, -} - -struct QuantumPhase { - span: Span, - modifiers: List, - arg: Expr, - qubits: List>, -} - -struct DelayInstruction { - span: Span, - duration: Expr, - qubits: List>, -} - -struct BoxStmt { - span: Span, - duration: Option, - body: List, -} - -struct QuantumMeasurementStmt { - span: Span, - measure: QuantumMeasurement, - target: Option>>, -} - -struct ClassicalDeclaration { - span: Span, - r#type: ClassicalType, - identifier: Identifier, - init_expr: Option>, -} - -struct IODeclaration { - span: Span, - io_identifier: IOKeyword, - r#type: ClassicalType, - identifier: Identifier, -} - -struct ConstantDeclaration { - span: Span, - r#type: ClassicalType, - identifier: Identifier, - init_expr: Expr, -} - -struct CalibrationGrammarDeclaration { - span: Span, - name: String, -} - -struct CalibrationStmt { - span: Span, - body: String, -} - -struct CalibrationDefinition { - span: Span, - name: Identifier, - args: List>, - qubits: List, - return_type: Option, - body: String, -} - -struct SubroutineDefinition { - span: Span, - name: Identifier, - args: List>, - body: List, - return_type: Option, -} - -struct ReturnStmt { - span: Span, - expr: Option>, -} - -struct BranchingStmt { - span: Span, - condition: Expr, - if_block: List, - else_block: List, -} - -struct WhileLoop { - span: Span, - while_condition: Expr, - block: List, -} - -struct ForInLoop { - span: Span, - r#type: ClassicalType, - identifier: Identifier, - set_declaration: Union, - block: List, -} - -struct SwitchStmt { - span: Span, - target: Expr, - cases: List<(List, CompoundStmt)>, - /// Note that `None` is quite different to `[]` in this case; the latter is - /// an explicitly empty body, whereas the absence of a default might mean - /// that the switch is inexhaustive, and a linter might want to complain. - default: Option, -} - -struct ClassicalAssignment { - span: Span, - lvalue: Union, - op: AssignmentOp, -} - -enum ExprKind { - Identifier(Identifier), - UnaryExpr(UnaryExpr), - BinaryExpr(BinaryExpr), - Literal(Literal), - FunctionCall(FunctionCall), - Cast(Cast), - Concatenation(Concatenation), - IndexExpr(IndexExpr), - DurationOf(DurationOf), - SizeOf(SizeOf), -} - -struct UnaryExpr { - span: Span, - op: UnaryOp, - expr: Expr, -} - -struct BinaryExpr { - span: Span, - op: BinaryOp, - lhs: Expr, - rhs: Expr, -} -struct FunctionCall { - span: Span, - name: Identifier, - args: List, -} - -struct Cast { - span: Span, - r#type: ClassicalType, - arg: Expr, -} - -struct IndexExpr { - span: Span, - collection: Expr, - index: IndexElement, -} - -struct DurationOf { - span: Span, - target: List, -} - -struct SizeOf { - span: Span, - target: Expr, - value: Union, -} - -enum UnaryOp { - NegB, - NegL, - NegN, -} - -enum BinaryOp { - /// `>` - Gt, - /// `<` - Lt, - /// `>=` - Gte, - /// `<=` - Lte, - /// `==` - Eq, - /// `!=` - Neq, - /// `&&` - AndL, - /// `||` - OrL, - /// `|` - OrB, - /// `^` - XorB, - /// `&` - AndB, - /// `<<` - ShL, - /// `>>` - ShR, - /// `+` - Add, - /// `-` - Sub, - /// `*` - Mul, - /// `/` - Div, - /// `%` - Mod, - /// `**` - Exp, -} - -struct Literal { - span: Span, - kind: LiteralKind, -} - -enum LiteralKind { - Array(List), - Bitstring { value: usize, width: u32 }, - Boolean(bool), - Duration { value: f64, unit: TimeUnit }, - Float(f64), - Imaginary(f64), - Integer(i64), -} - -struct Concatenation { - lhs: Expr, - rhs: Expr, -} - -type IndexElement = Union>>; - -enum AssignmentOp { - BinaryOp(BinaryOp), - /// OpenQASM3 has the `~=` assignment operator. - /// This enum variant is meant to capture that. - UnaryOp(UnaryOp), - Assign, -} - -enum GateModifierName { - Inv, - Pow, - Ctrl, - NegCtrl, -} - -enum IOKeyword { - Input, - Output, -} - -enum TimeUnit { - /// TODO: what is this? - Dt, - /// Nanoseconds. - Ns, - /// Microseconds. - Us, - /// Milliseconds. - Ms, - /// Seconds. - S, -} From 7787a272cb34bab04459ce5c15e494cddeadd296 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 08:45:00 -0800 Subject: [PATCH 10/20] Change error codes to Qasm3.Lex --- compiler/qsc_qasm3/src/lex/cooked.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 3da3efc8e6..4b0459f2a4 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -38,19 +38,19 @@ pub(crate) struct Token { #[derive(Clone, Copy, Debug, Diagnostic, Eq, Error, PartialEq)] pub enum Error { #[error("expected {0} to complete {1}, found {2}")] - #[diagnostic(code("Qsc.Lex.Incomplete"))] + #[diagnostic(code("Qasm3.Lex.Incomplete"))] Incomplete(raw::TokenKind, TokenKind, raw::TokenKind, #[label] Span), #[error("expected {0} to complete {1}, found EOF")] - #[diagnostic(code("Qsc.Lex.IncompleteEof"))] + #[diagnostic(code("Qasm3.Lex.IncompleteEof"))] IncompleteEof(raw::TokenKind, TokenKind, #[label] Span), #[error("unterminated string literal")] - #[diagnostic(code("Qsc.Lex.UnterminatedString"))] + #[diagnostic(code("Qasm3.Lex.UnterminatedString"))] UnterminatedString(#[label] Span), #[error("unrecognized character `{0}`")] - #[diagnostic(code("Qsc.Lex.UnknownChar"))] + #[diagnostic(code("Qasm3.Lex.UnknownChar"))] Unknown(char, #[label] Span), } From 8550ccfc7acea42af6a95e83f4bcfba1f75a1a20 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 09:53:01 -0800 Subject: [PATCH 11/20] add unit tests for literal fragments --- compiler/qsc_qasm3/src/lex/raw.rs | 33 +++--- compiler/qsc_qasm3/src/lex/raw/tests.rs | 140 ++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 15 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 4be475c06b..6be2cbda69 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -268,24 +268,27 @@ impl<'a> Lexer<'a> { let second = self.second(); // Check for some special literal fragments. - // fragment TimeUnit: 'dt' | 'ns' | 'us' | 'µs' | 'ms' | 's'; - if c == 's' && first.is_some_and(|c1| c1 != '_' && !char::is_alphanumeric(c1)) { + if c == 's' + && (first.is_none() || first.is_some_and(|c1| c1 != '_' && !c1.is_alphanumeric())) + { return Some(TokenKind::LiteralFragment(LiteralFragmentKind::S)); } - if let (Some(c1), Some(c2)) = (first, second) { - if c2 != '_' || !c2.is_alphanumeric() { - match (c, c1) { - ('i', 'm') => { - return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Imag)) - } - ('d', 't') => return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Dt)), - ('n', 's') => return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Ns)), - ('u' | 'µ', 's') => { - return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Us)) - } - ('m', 's') => return Some(TokenKind::LiteralFragment(LiteralFragmentKind::Ms)), - _ => (), + if let Some(c1) = first { + if second.is_none() || second.is_some_and(|c1| c1 != '_' && !c1.is_alphanumeric()) { + let fragment = match (c, c1) { + ('i', 'm') => Some(TokenKind::LiteralFragment(LiteralFragmentKind::Imag)), + ('d', 't') => Some(TokenKind::LiteralFragment(LiteralFragmentKind::Dt)), + ('n', 's') => Some(TokenKind::LiteralFragment(LiteralFragmentKind::Ns)), + ('u' | 'µ', 's') => Some(TokenKind::LiteralFragment(LiteralFragmentKind::Us)), + ('m', 's') => Some(TokenKind::LiteralFragment(LiteralFragmentKind::Ms)), + _ => None, + }; + + if fragment.is_some() { + // consume `first` before returning. + self.next(); + return fragment; } } } diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index f902969599..4b2930a14c 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -735,3 +735,143 @@ fn float_hexadecimal() { "#]], ); } + +#[test] +fn fragments() { + check( + "im dt ns us µs ms s", + &expect![[r#" + [ + Token { + kind: LiteralFragment( + Imag, + ), + offset: 0, + }, + Token { + kind: Whitespace, + offset: 2, + }, + Token { + kind: LiteralFragment( + Dt, + ), + offset: 3, + }, + Token { + kind: Whitespace, + offset: 5, + }, + Token { + kind: LiteralFragment( + Ns, + ), + offset: 6, + }, + Token { + kind: Whitespace, + offset: 8, + }, + Token { + kind: LiteralFragment( + Us, + ), + offset: 9, + }, + Token { + kind: Whitespace, + offset: 11, + }, + Token { + kind: LiteralFragment( + Us, + ), + offset: 12, + }, + Token { + kind: Whitespace, + offset: 15, + }, + Token { + kind: LiteralFragment( + Ms, + ), + offset: 16, + }, + Token { + kind: Whitespace, + offset: 18, + }, + Token { + kind: LiteralFragment( + S, + ), + offset: 19, + }, + ] + "#]], + ); +} + +#[test] +fn identifiers_with_fragment_prefixes() { + check( + "imx dtx nsx usx µsx msx sx", + &expect![[r#" + [ + Token { + kind: Ident, + offset: 0, + }, + Token { + kind: Whitespace, + offset: 3, + }, + Token { + kind: Ident, + offset: 4, + }, + Token { + kind: Whitespace, + offset: 7, + }, + Token { + kind: Ident, + offset: 8, + }, + Token { + kind: Whitespace, + offset: 11, + }, + Token { + kind: Ident, + offset: 12, + }, + Token { + kind: Whitespace, + offset: 15, + }, + Token { + kind: Ident, + offset: 16, + }, + Token { + kind: Whitespace, + offset: 20, + }, + Token { + kind: Ident, + offset: 21, + }, + Token { + kind: Whitespace, + offset: 24, + }, + Token { + kind: Ident, + offset: 25, + }, + ] + "#]], + ); +} From 155e7aa17a81686404f1b8218d95d33a341f620a Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:01:13 -0800 Subject: [PATCH 12/20] solve TODO notes --- compiler/qsc_qasm3/src/lex/cooked.rs | 5 +++-- compiler/qsc_qasm3/src/lex/raw.rs | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 4b0459f2a4..962b9d7e01 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -287,7 +287,8 @@ impl Display for Literal { #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum TimingLiteralKind { - /// Timing literal: TODO: what is this? + /// Timing literal: Backend-dependent unit. + /// Equivalent to the duration of one waveform sample on the backend. Dt, /// Timing literal: Nanoseconds. Ns, @@ -346,7 +347,7 @@ pub enum ClosedBinaryOperator { Star, /// `**` StarStar, - // TODO: missing Tilde according to qasm3Lexer.g4 to be able to express ~= + // Note: Missing Tilde according to qasm3Lexer.g4 to be able to express ~= // But this is this a bug in the official qasm lexer? } diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 6be2cbda69..3d4393e95f 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -164,7 +164,8 @@ pub enum CommentKind { pub enum LiteralFragmentKind { /// Imaginary literal fragment. Imag, - /// Timing literal: TODO: what is this? + /// Timing literal: Backend-dependent unit. + /// Equivalent to the duration of one waveform sample on the backend. Dt, /// Timing literal: Nanoseconds. Ns, From 58c52c5440350e3459caea5f2fecde6c586262f6 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:38:36 -0800 Subject: [PATCH 13/20] move && and || to ClosedBinaryOp --- compiler/qsc_qasm3/src/keyword.rs | 88 ++++++++++------- compiler/qsc_qasm3/src/lex/cooked.rs | 107 +++++++++------------ compiler/qsc_qasm3/src/lex/cooked/tests.rs | 80 ++++++++------- 3 files changed, 140 insertions(+), 135 deletions(-) diff --git a/compiler/qsc_qasm3/src/keyword.rs b/compiler/qsc_qasm3/src/keyword.rs index 47392c615f..f762d2de18 100644 --- a/compiler/qsc_qasm3/src/keyword.rs +++ b/compiler/qsc_qasm3/src/keyword.rs @@ -9,36 +9,58 @@ use std::{ #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Sequence)] pub enum Keyword { - OpenQASM, - Include, - Defcalgrammar, - Def, - Cal, - Gate, - Extern, + Annotation, Box, - Let, - Break, + Cal, + Case, Continue, - If, + Def, + Default, + Defcalgrammar, Else, End, - Return, + Extern, For, - While, + Gate, + If, In, - Switch, - Case, - Default, - + Include, + Let, + OpenQASM, Pragma, - Annotation, + Return, + Switch, + While, } impl Keyword { pub(super) fn as_str(self) -> &'static str { - todo!() + match self { + Keyword::Annotation => "annotation", + Keyword::Box => "box", + Keyword::Break => "break", + Keyword::Cal => "cal", + Keyword::Case => "case", + Keyword::Continue => "continue", + Keyword::Def => "def", + Keyword::Default => "default", + Keyword::Defcalgrammar => "defcalgrammar", + Keyword::Else => "else", + Keyword::End => "end", + Keyword::Extern => "extern", + Keyword::For => "for", + Keyword::Gate => "gate", + Keyword::If => "if", + Keyword::In => "in", + Keyword::Include => "include", + Keyword::Let => "let", + Keyword::OpenQASM => "openqasm", + Keyword::Pragma => "pragma", + Keyword::Return => "return", + Keyword::Switch => "switch", + Keyword::While => "while", + } } } @@ -56,29 +78,29 @@ impl FromStr for Keyword { // frequency in Q# so that fewer comparisons are needed on average. fn from_str(s: &str) -> Result { match s { - "openqasm" => Ok(Self::OpenQASM), - "include" => Ok(Self::Include), - "defcalgrammar" => Ok(Self::Defcalgrammar), - "def" => Ok(Self::Def), - "cal" => Ok(Self::Cal), - "gate" => Ok(Self::Gate), - "extern" => Ok(Self::Extern), + "annotation" => Ok(Self::Annotation), "box" => Ok(Self::Box), - "let" => Ok(Self::Let), "break" => Ok(Self::Break), + "cal" => Ok(Self::Cal), + "case" => Ok(Self::Case), "continue" => Ok(Self::Continue), - "if" => Ok(Self::If), + "def" => Ok(Self::Def), + "default" => Ok(Self::Default), + "defcalgrammar" => Ok(Self::Defcalgrammar), "else" => Ok(Self::Else), "end" => Ok(Self::End), - "return" => Ok(Self::Return), + "extern" => Ok(Self::Extern), "for" => Ok(Self::For), - "while" => Ok(Self::While), + "gate" => Ok(Self::Gate), + "if" => Ok(Self::If), "in" => Ok(Self::In), - "switch" => Ok(Self::Switch), - "case" => Ok(Self::Case), - "default" => Ok(Self::Default), + "include" => Ok(Self::Include), + "let" => Ok(Self::Let), + "openqasm" => Ok(Self::OpenQASM), "pragma" => Ok(Self::Pragma), - "annotation" => Ok(Self::Annotation), + "return" => Ok(Self::Return), + "switch" => Ok(Self::Switch), + "while" => Ok(Self::While), _ => Err(()), } } diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 962b9d7e01..7e32d0096f 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -120,11 +120,15 @@ pub enum TokenKind { Arrow, // Operators, - UnaryOperator(UnaryOperator), - BinaryOperator(ClosedBinaryOperator), - BinaryOperatorEq(ClosedBinaryOperator), + ClosedBinaryOp(ClosedBinaryOp), + BinaryOperatorEq(ClosedBinaryOp), ComparisonOperator(ComparisonOperator), + /// `=` Eq, + /// `!` + Bang, + /// `~` + Tilde, Identifier, HardwareQubit, @@ -159,11 +163,12 @@ impl Display for TokenKind { TokenKind::Comma => write!(f, "`,`"), TokenKind::PlusPlus => write!(f, "`++`"), TokenKind::Arrow => write!(f, "`->`"), - TokenKind::UnaryOperator(op) => write!(f, "`{op}`"), - TokenKind::BinaryOperator(op) => write!(f, "`{op}`"), + TokenKind::ClosedBinaryOp(op) => write!(f, "`{op}`"), TokenKind::BinaryOperatorEq(op) => write!(f, "`{op}=`"), TokenKind::ComparisonOperator(op) => write!(f, "`{op}`"), TokenKind::Eq => write!(f, "`=`"), + TokenKind::Bang => write!(f, "`!`"), + TokenKind::Tilde => write!(f, "`~`"), TokenKind::Identifier => write!(f, "identifier"), TokenKind::HardwareQubit => write!(f, "hardware bit"), } @@ -300,35 +305,19 @@ pub enum TimingLiteralKind { S, } -#[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum UnaryOperator { - /// `!` - Bang, - /// `-` - Minus, - /// `~` - Tilde, -} - -impl Display for UnaryOperator { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.write_str(match self { - UnaryOperator::Bang => "!", - UnaryOperator::Minus => "-", - UnaryOperator::Tilde => "~", - }) - } -} - /// A binary operator that returns the same type as the type of its first operand; in other words, /// the domain of the first operand is closed under this operation. These are candidates for /// compound assignment operators, like `+=`. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum ClosedBinaryOperator { +pub enum ClosedBinaryOp { /// `&` Amp, + /// `&&` + AmpAmp, /// `|` Bar, + /// `||` + BarBar, /// `^` Caret, /// `>>` @@ -351,32 +340,30 @@ pub enum ClosedBinaryOperator { // But this is this a bug in the official qasm lexer? } -impl Display for ClosedBinaryOperator { +impl Display for ClosedBinaryOp { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.write_str(match self { - ClosedBinaryOperator::Amp => "&", - ClosedBinaryOperator::Bar => "|", - ClosedBinaryOperator::Caret => "^", - ClosedBinaryOperator::GtGt => ">>", - ClosedBinaryOperator::LtLt => "<<", - ClosedBinaryOperator::Minus => "-", - ClosedBinaryOperator::Percent => "%", - ClosedBinaryOperator::Plus => "+", - ClosedBinaryOperator::Slash => "/", - ClosedBinaryOperator::Star => "*", - ClosedBinaryOperator::StarStar => "**", + ClosedBinaryOp::Amp => "&", + ClosedBinaryOp::AmpAmp => "&&", + ClosedBinaryOp::Bar => "|", + ClosedBinaryOp::BarBar => "||", + ClosedBinaryOp::Caret => "^", + ClosedBinaryOp::GtGt => ">>", + ClosedBinaryOp::LtLt => "<<", + ClosedBinaryOp::Minus => "-", + ClosedBinaryOp::Percent => "%", + ClosedBinaryOp::Plus => "+", + ClosedBinaryOp::Slash => "/", + ClosedBinaryOp::Star => "*", + ClosedBinaryOp::StarStar => "**", }) } } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum ComparisonOperator { - /// `&&` - AmpAmp, /// `!=` BangEq, - /// `||` - BarBar, /// `==` EqEq, /// `>` @@ -392,9 +379,7 @@ pub enum ComparisonOperator { impl Display for ComparisonOperator { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.write_str(match self { - ComparisonOperator::AmpAmp => "&&", ComparisonOperator::BangEq => "!=", - ComparisonOperator::BarBar => "||", ComparisonOperator::EqEq => "==", ComparisonOperator::Gt => ">", ComparisonOperator::GtEq => ">=", @@ -562,9 +547,9 @@ impl<'a> Lexer<'a> { match single { Single::Amp => { if self.next_if_eq_single(Single::Amp) { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::AmpAmp)) + Ok(TokenKind::ClosedBinaryOp(ClosedBinaryOp::AmpAmp)) } else { - Ok(self.closed_bin_op(ClosedBinaryOperator::Amp)) + Ok(self.closed_bin_op(ClosedBinaryOp::Amp)) } } Single::At => { @@ -580,17 +565,17 @@ impl<'a> Lexer<'a> { if self.next_if_eq_single(Single::Eq) { Ok(TokenKind::ComparisonOperator(ComparisonOperator::BangEq)) } else { - Ok(TokenKind::UnaryOperator(UnaryOperator::Bang)) + Ok(TokenKind::Bang) } } Single::Bar => { if self.next_if_eq_single(Single::Bar) { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::BarBar)) + Ok(TokenKind::ClosedBinaryOp(ClosedBinaryOp::BarBar)) } else { - Ok(self.closed_bin_op(ClosedBinaryOperator::Bar)) + Ok(self.closed_bin_op(ClosedBinaryOp::Bar)) } } - Single::Caret => Ok(self.closed_bin_op(ClosedBinaryOperator::Caret)), + Single::Caret => Ok(self.closed_bin_op(ClosedBinaryOp::Caret)), Single::Close(delim) => Ok(TokenKind::Close(delim)), Single::Colon => Ok(TokenKind::Colon), Single::Comma => Ok(TokenKind::Comma), @@ -606,7 +591,7 @@ impl<'a> Lexer<'a> { if self.next_if_eq_single(Single::Eq) { Ok(TokenKind::ComparisonOperator(ComparisonOperator::GtEq)) } else if self.next_if_eq_single(Single::Gt) { - Ok(self.closed_bin_op(ClosedBinaryOperator::GtGt)) + Ok(self.closed_bin_op(ClosedBinaryOp::GtGt)) } else { Ok(TokenKind::ComparisonOperator(ComparisonOperator::Gt)) } @@ -615,7 +600,7 @@ impl<'a> Lexer<'a> { if self.next_if_eq_single(Single::Eq) { Ok(TokenKind::ComparisonOperator(ComparisonOperator::LtEq)) } else if self.next_if_eq_single(Single::Lt) { - Ok(self.closed_bin_op(ClosedBinaryOperator::LtLt)) + Ok(self.closed_bin_op(ClosedBinaryOp::LtLt)) } else { Ok(TokenKind::ComparisonOperator(ComparisonOperator::Lt)) } @@ -624,36 +609,36 @@ impl<'a> Lexer<'a> { if self.next_if_eq_single(Single::Gt) { Ok(TokenKind::Arrow) } else { - Ok(self.closed_bin_op(ClosedBinaryOperator::Minus)) + Ok(self.closed_bin_op(ClosedBinaryOp::Minus)) } } Single::Open(delim) => Ok(TokenKind::Open(delim)), - Single::Percent => Ok(self.closed_bin_op(ClosedBinaryOperator::Percent)), + Single::Percent => Ok(self.closed_bin_op(ClosedBinaryOp::Percent)), Single::Plus => { if self.next_if_eq_single(Single::Plus) { Ok(TokenKind::PlusPlus) } else { - Ok(self.closed_bin_op(ClosedBinaryOperator::Plus)) + Ok(self.closed_bin_op(ClosedBinaryOp::Plus)) } } Single::Semi => Ok(TokenKind::Semicolon), - Single::Slash => Ok(self.closed_bin_op(ClosedBinaryOperator::Slash)), + Single::Slash => Ok(self.closed_bin_op(ClosedBinaryOp::Slash)), Single::Star => { if self.next_if_eq_single(Single::Star) { - Ok(self.closed_bin_op(ClosedBinaryOperator::StarStar)) + Ok(self.closed_bin_op(ClosedBinaryOp::StarStar)) } else { - Ok(self.closed_bin_op(ClosedBinaryOperator::Star)) + Ok(self.closed_bin_op(ClosedBinaryOp::Star)) } } - Single::Tilde => Ok(TokenKind::UnaryOperator(UnaryOperator::Tilde)), + Single::Tilde => Ok(TokenKind::Tilde), } } - fn closed_bin_op(&mut self, op: ClosedBinaryOperator) -> TokenKind { + fn closed_bin_op(&mut self, op: ClosedBinaryOp) -> TokenKind { if self.next_if_eq_single(Single::Eq) { TokenKind::BinaryOperatorEq(op) } else { - TokenKind::BinaryOperator(op) + TokenKind::ClosedBinaryOp(op) } } diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs index b5297ef55c..820cc96df9 100644 --- a/compiler/qsc_qasm3/src/lex/cooked/tests.rs +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -20,32 +20,36 @@ fn op_string(kind: TokenKind) -> Option { TokenKind::Comma => Some(",".to_string()), TokenKind::Dot => Some(".".to_string()), TokenKind::Eq => Some("=".to_string()), + TokenKind::Bang => Some("!".to_string()), + TokenKind::Tilde => Some("~".to_string()), TokenKind::Open(Delim::Brace) => Some("{".to_string()), TokenKind::Open(Delim::Bracket) => Some("[".to_string()), TokenKind::Open(Delim::Paren) => Some("(".to_string()), TokenKind::PlusPlus => Some("++".to_string()), - TokenKind::Keyword(_) => None, - TokenKind::Type(_) => todo!(), - TokenKind::GPhase => todo!(), - TokenKind::Inv => todo!(), - TokenKind::Pow => todo!(), - TokenKind::Ctrl => todo!(), - TokenKind::NegCtrl => todo!(), - TokenKind::Dim => todo!(), - TokenKind::DurationOf => todo!(), - TokenKind::Delay => todo!(), - TokenKind::Reset => todo!(), - TokenKind::Measure => todo!(), - TokenKind::Barrier => todo!(), - TokenKind::Literal(literal) => todo!(), - TokenKind::Semicolon => todo!(), - TokenKind::Arrow => todo!(), - TokenKind::UnaryOperator(unary_operator) => todo!(), - TokenKind::BinaryOperator(closed_binary_operator) => todo!(), - TokenKind::BinaryOperatorEq(closed_binary_operator) => todo!(), - TokenKind::ComparisonOperator(comparison_operator) => todo!(), - TokenKind::Identifier => todo!(), - TokenKind::HardwareQubit => todo!(), + TokenKind::Keyword(keyword) => Some(keyword.to_string()), + TokenKind::Type(type_) => Some(type_.to_string()), + TokenKind::GPhase => Some("gphase".to_string()), + TokenKind::Inv => Some("inv".to_string()), + TokenKind::Pow => Some("pow".to_string()), + TokenKind::Ctrl => Some("ctrl".to_string()), + TokenKind::NegCtrl => Some("negctrl".to_string()), + TokenKind::Dim => Some("dim".to_string()), + TokenKind::DurationOf => Some("durationof".to_string()), + TokenKind::Delay => Some("delay".to_string()), + TokenKind::Reset => Some("reset".to_string()), + TokenKind::Measure => Some("measure".to_string()), + TokenKind::Barrier => Some("barrier".to_string()), + TokenKind::Semicolon => Some(";".to_string()), + TokenKind::Arrow => Some("->".to_string()), + TokenKind::ClosedBinaryOp(op) => Some(op.to_string()), + TokenKind::BinaryOperatorEq( + super::ClosedBinaryOp::AmpAmp | super::ClosedBinaryOp::BarBar, + ) + | TokenKind::Literal(_) => None, + TokenKind::BinaryOperatorEq(op) => Some(format!("{op}=")), + TokenKind::ComparisonOperator(op) => Some(op.to_string()), + TokenKind::Identifier => Some("foo".to_string()), + TokenKind::HardwareQubit => Some("$1".to_string()), } } @@ -86,19 +90,16 @@ fn amp() { "&", &expect![[r#" [ - Err( - IncompleteEof( - Single( + Ok( + Token { + kind: BinaryOperator( Amp, ), - ClosedBinOp( - AmpAmpAmp, - ), - Span { - lo: 1, + span: Span { + lo: 0, hi: 1, }, - ), + }, ), ] "#]], @@ -111,19 +112,16 @@ fn amp_amp() { "&&", &expect![[r#" [ - Err( - IncompleteEof( - Single( - Amp, - ), - ClosedBinOp( - AmpAmpAmp, + Ok( + Token { + kind: ClosedBinaryOp( + AmpAmp, ), - Span { - lo: 2, + span: Span { + lo: 0, hi: 2, }, - ), + }, ), ] "#]], From ae88ec1dcbebd4f59dfa03f0ce63ee1bbc3948e5 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 11:08:21 -0800 Subject: [PATCH 14/20] update cooked unit tests --- compiler/qsc_qasm3/src/lex/cooked.rs | 144 +-- compiler/qsc_qasm3/src/lex/cooked/tests.rs | 1093 +++----------------- 2 files changed, 227 insertions(+), 1010 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 7e32d0096f..63eae6c144 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -120,9 +120,9 @@ pub enum TokenKind { Arrow, // Operators, - ClosedBinaryOp(ClosedBinaryOp), - BinaryOperatorEq(ClosedBinaryOp), - ComparisonOperator(ComparisonOperator), + ClosedBinOp(ClosedBinOp), + BinOpEq(ClosedBinOp), + ComparisonOp(ComparisonOp), /// `=` Eq, /// `!` @@ -163,9 +163,9 @@ impl Display for TokenKind { TokenKind::Comma => write!(f, "`,`"), TokenKind::PlusPlus => write!(f, "`++`"), TokenKind::Arrow => write!(f, "`->`"), - TokenKind::ClosedBinaryOp(op) => write!(f, "`{op}`"), - TokenKind::BinaryOperatorEq(op) => write!(f, "`{op}=`"), - TokenKind::ComparisonOperator(op) => write!(f, "`{op}`"), + TokenKind::ClosedBinOp(op) => write!(f, "`{op}`"), + TokenKind::BinOpEq(op) => write!(f, "`{op}=`"), + TokenKind::ComparisonOp(op) => write!(f, "`{op}`"), TokenKind::Eq => write!(f, "`=`"), TokenKind::Bang => write!(f, "`!`"), TokenKind::Tilde => write!(f, "`~`"), @@ -309,7 +309,7 @@ pub enum TimingLiteralKind { /// the domain of the first operand is closed under this operation. These are candidates for /// compound assignment operators, like `+=`. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum ClosedBinaryOp { +pub enum ClosedBinOp { /// `&` Amp, /// `&&` @@ -340,28 +340,28 @@ pub enum ClosedBinaryOp { // But this is this a bug in the official qasm lexer? } -impl Display for ClosedBinaryOp { +impl Display for ClosedBinOp { fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.write_str(match self { - ClosedBinaryOp::Amp => "&", - ClosedBinaryOp::AmpAmp => "&&", - ClosedBinaryOp::Bar => "|", - ClosedBinaryOp::BarBar => "||", - ClosedBinaryOp::Caret => "^", - ClosedBinaryOp::GtGt => ">>", - ClosedBinaryOp::LtLt => "<<", - ClosedBinaryOp::Minus => "-", - ClosedBinaryOp::Percent => "%", - ClosedBinaryOp::Plus => "+", - ClosedBinaryOp::Slash => "/", - ClosedBinaryOp::Star => "*", - ClosedBinaryOp::StarStar => "**", + ClosedBinOp::Amp => "&", + ClosedBinOp::AmpAmp => "&&", + ClosedBinOp::Bar => "|", + ClosedBinOp::BarBar => "||", + ClosedBinOp::Caret => "^", + ClosedBinOp::GtGt => ">>", + ClosedBinOp::LtLt => "<<", + ClosedBinOp::Minus => "-", + ClosedBinOp::Percent => "%", + ClosedBinOp::Plus => "+", + ClosedBinOp::Slash => "/", + ClosedBinOp::Star => "*", + ClosedBinOp::StarStar => "**", }) } } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub enum ComparisonOperator { +pub enum ComparisonOp { /// `!=` BangEq, /// `==` @@ -376,15 +376,15 @@ pub enum ComparisonOperator { LtEq, } -impl Display for ComparisonOperator { +impl Display for ComparisonOp { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.write_str(match self { - ComparisonOperator::BangEq => "!=", - ComparisonOperator::EqEq => "==", - ComparisonOperator::Gt => ">", - ComparisonOperator::GtEq => ">=", - ComparisonOperator::Lt => "<", - ComparisonOperator::LtEq => "<=", + ComparisonOp::BangEq => "!=", + ComparisonOp::EqEq => "==", + ComparisonOp::Gt => ">", + ComparisonOp::GtEq => ">=", + ComparisonOp::Lt => "<", + ComparisonOp::LtEq => "<=", }) } } @@ -493,23 +493,31 @@ impl<'a> Lexer<'a> { raw::TokenKind::Number(number) => { // after reading a decimal number or a float there could be a whitespace // followed by a fragment, which will change the type of the literal. - if let ( - Some(raw::TokenKind::Whitespace), - Some(raw::TokenKind::LiteralFragment(fragment)), - ) = (self.first(), self.second()) - { - use self::Literal::{Imaginary, Timing}; - use TokenKind::Literal; - Ok(Some(match fragment { - raw::LiteralFragmentKind::Imag => Literal(Imaginary), - raw::LiteralFragmentKind::Dt => Literal(Timing(TimingLiteralKind::Dt)), - raw::LiteralFragmentKind::Ns => Literal(Timing(TimingLiteralKind::Ns)), - raw::LiteralFragmentKind::Us => Literal(Timing(TimingLiteralKind::Us)), - raw::LiteralFragmentKind::Ms => Literal(Timing(TimingLiteralKind::Ms)), - raw::LiteralFragmentKind::S => Literal(Timing(TimingLiteralKind::S)), - })) - } else { - Ok(Some(number.into())) + match (self.first(), self.second()) { + (Some(raw::TokenKind::LiteralFragment(fragment)), _) + | ( + Some(raw::TokenKind::Whitespace), + Some(raw::TokenKind::LiteralFragment(fragment)), + ) => { + use self::Literal::{Imaginary, Timing}; + use TokenKind::Literal; + + // if first() was a whitespace, we need to consume an extra token + if self.first() == Some(raw::TokenKind::Whitespace) { + self.next(); + } + self.next(); + + Ok(Some(match fragment { + raw::LiteralFragmentKind::Imag => Literal(Imaginary), + raw::LiteralFragmentKind::Dt => Literal(Timing(TimingLiteralKind::Dt)), + raw::LiteralFragmentKind::Ns => Literal(Timing(TimingLiteralKind::Ns)), + raw::LiteralFragmentKind::Us => Literal(Timing(TimingLiteralKind::Us)), + raw::LiteralFragmentKind::Ms => Literal(Timing(TimingLiteralKind::Ms)), + raw::LiteralFragmentKind::S => Literal(Timing(TimingLiteralKind::S)), + })) + } + _ => Ok(Some(number.into())), } } raw::TokenKind::Single(single) => self.single(single).map(Some), @@ -547,9 +555,9 @@ impl<'a> Lexer<'a> { match single { Single::Amp => { if self.next_if_eq_single(Single::Amp) { - Ok(TokenKind::ClosedBinaryOp(ClosedBinaryOp::AmpAmp)) + Ok(TokenKind::ClosedBinOp(ClosedBinOp::AmpAmp)) } else { - Ok(self.closed_bin_op(ClosedBinaryOp::Amp)) + Ok(self.closed_bin_op(ClosedBinOp::Amp)) } } Single::At => { @@ -563,82 +571,82 @@ impl<'a> Lexer<'a> { } Single::Bang => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::BangEq)) + Ok(TokenKind::ComparisonOp(ComparisonOp::BangEq)) } else { Ok(TokenKind::Bang) } } Single::Bar => { if self.next_if_eq_single(Single::Bar) { - Ok(TokenKind::ClosedBinaryOp(ClosedBinaryOp::BarBar)) + Ok(TokenKind::ClosedBinOp(ClosedBinOp::BarBar)) } else { - Ok(self.closed_bin_op(ClosedBinaryOp::Bar)) + Ok(self.closed_bin_op(ClosedBinOp::Bar)) } } - Single::Caret => Ok(self.closed_bin_op(ClosedBinaryOp::Caret)), + Single::Caret => Ok(self.closed_bin_op(ClosedBinOp::Caret)), Single::Close(delim) => Ok(TokenKind::Close(delim)), Single::Colon => Ok(TokenKind::Colon), Single::Comma => Ok(TokenKind::Comma), Single::Dot => Ok(TokenKind::Dot), Single::Eq => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::EqEq)) + Ok(TokenKind::ComparisonOp(ComparisonOp::EqEq)) } else { Ok(TokenKind::Eq) } } Single::Gt => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::GtEq)) + Ok(TokenKind::ComparisonOp(ComparisonOp::GtEq)) } else if self.next_if_eq_single(Single::Gt) { - Ok(self.closed_bin_op(ClosedBinaryOp::GtGt)) + Ok(self.closed_bin_op(ClosedBinOp::GtGt)) } else { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::Gt)) + Ok(TokenKind::ComparisonOp(ComparisonOp::Gt)) } } Single::Lt => { if self.next_if_eq_single(Single::Eq) { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::LtEq)) + Ok(TokenKind::ComparisonOp(ComparisonOp::LtEq)) } else if self.next_if_eq_single(Single::Lt) { - Ok(self.closed_bin_op(ClosedBinaryOp::LtLt)) + Ok(self.closed_bin_op(ClosedBinOp::LtLt)) } else { - Ok(TokenKind::ComparisonOperator(ComparisonOperator::Lt)) + Ok(TokenKind::ComparisonOp(ComparisonOp::Lt)) } } Single::Minus => { if self.next_if_eq_single(Single::Gt) { Ok(TokenKind::Arrow) } else { - Ok(self.closed_bin_op(ClosedBinaryOp::Minus)) + Ok(self.closed_bin_op(ClosedBinOp::Minus)) } } Single::Open(delim) => Ok(TokenKind::Open(delim)), - Single::Percent => Ok(self.closed_bin_op(ClosedBinaryOp::Percent)), + Single::Percent => Ok(self.closed_bin_op(ClosedBinOp::Percent)), Single::Plus => { if self.next_if_eq_single(Single::Plus) { Ok(TokenKind::PlusPlus) } else { - Ok(self.closed_bin_op(ClosedBinaryOp::Plus)) + Ok(self.closed_bin_op(ClosedBinOp::Plus)) } } Single::Semi => Ok(TokenKind::Semicolon), - Single::Slash => Ok(self.closed_bin_op(ClosedBinaryOp::Slash)), + Single::Slash => Ok(self.closed_bin_op(ClosedBinOp::Slash)), Single::Star => { if self.next_if_eq_single(Single::Star) { - Ok(self.closed_bin_op(ClosedBinaryOp::StarStar)) + Ok(self.closed_bin_op(ClosedBinOp::StarStar)) } else { - Ok(self.closed_bin_op(ClosedBinaryOp::Star)) + Ok(self.closed_bin_op(ClosedBinOp::Star)) } } Single::Tilde => Ok(TokenKind::Tilde), } } - fn closed_bin_op(&mut self, op: ClosedBinaryOp) -> TokenKind { + fn closed_bin_op(&mut self, op: ClosedBinOp) -> TokenKind { if self.next_if_eq_single(Single::Eq) { - TokenKind::BinaryOperatorEq(op) + TokenKind::BinOpEq(op) } else { - TokenKind::ClosedBinaryOp(op) + TokenKind::ClosedBinOp(op) } } diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs index 820cc96df9..f2f228ed92 100644 --- a/compiler/qsc_qasm3/src/lex/cooked/tests.rs +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -41,13 +41,11 @@ fn op_string(kind: TokenKind) -> Option { TokenKind::Barrier => Some("barrier".to_string()), TokenKind::Semicolon => Some(";".to_string()), TokenKind::Arrow => Some("->".to_string()), - TokenKind::ClosedBinaryOp(op) => Some(op.to_string()), - TokenKind::BinaryOperatorEq( - super::ClosedBinaryOp::AmpAmp | super::ClosedBinaryOp::BarBar, - ) + TokenKind::ClosedBinOp(op) => Some(op.to_string()), + TokenKind::BinOpEq(super::ClosedBinOp::AmpAmp | super::ClosedBinOp::BarBar) | TokenKind::Literal(_) => None, - TokenKind::BinaryOperatorEq(op) => Some(format!("{op}=")), - TokenKind::ComparisonOperator(op) => Some(op.to_string()), + TokenKind::BinOpEq(op) => Some(format!("{op}=")), + TokenKind::ComparisonOp(op) => Some(op.to_string()), TokenKind::Identifier => Some("foo".to_string()), TokenKind::HardwareQubit => Some("$1".to_string()), } @@ -92,7 +90,7 @@ fn amp() { [ Ok( Token { - kind: BinaryOperator( + kind: ClosedBinOp( Amp, ), span: Span { @@ -114,7 +112,7 @@ fn amp_amp() { [ Ok( Token { - kind: ClosedBinaryOp( + kind: ClosedBinOp( AmpAmp, ), span: Span { @@ -134,22 +132,16 @@ fn amp_plus() { "&+", &expect![[r#" [ - Err( - Incomplete( - Single( + Ok( + Token { + kind: ClosedBinOp( Amp, ), - ClosedBinOp( - AmpAmpAmp, - ), - Single( - Plus, - ), - Span { - lo: 1, - hi: 2, + span: Span { + lo: 0, + hi: 1, }, - ), + }, ), Ok( Token { @@ -175,7 +167,7 @@ fn amp_multibyte() { [ Ok( Token { - kind: BinaryOperator( + kind: ClosedBinOp( Amp, ), span: Span { @@ -207,22 +199,22 @@ fn amp_amp_amp_amp() { Ok( Token { kind: ClosedBinOp( - AmpAmpAmp, + AmpAmp, ), span: Span { lo: 0, - hi: 3, + hi: 2, }, }, ), Ok( Token { kind: ClosedBinOp( - AmpAmpAmp, + AmpAmp, ), span: Span { - lo: 3, - hi: 6, + lo: 2, + hi: 4, }, }, ), @@ -239,8 +231,10 @@ fn int() { [ Ok( Token { - kind: Int( - Decimal, + kind: Literal( + Integer( + Decimal, + ), ), span: Span { lo: 0, @@ -272,8 +266,10 @@ fn negative_int() { ), Ok( Token { - kind: Int( - Decimal, + kind: Literal( + Integer( + Decimal, + ), ), span: Span { lo: 1, @@ -305,8 +301,10 @@ fn positive_int() { ), Ok( Token { - kind: Int( - Decimal, + kind: Literal( + Integer( + Decimal, + ), ), span: Span { lo: 1, @@ -320,19 +318,19 @@ fn positive_int() { } #[test] -fn bigint() { +fn imag() { check( - "123L", + "123im", &expect![[r#" [ Ok( Token { - kind: BigInt( - Decimal, + kind: Literal( + Imaginary, ), span: Span { lo: 0, - hi: 4, + hi: 5, }, }, ), @@ -342,9 +340,31 @@ fn bigint() { } #[test] -fn negative_bigint() { +fn imag_with_whitespace() { + check( + "123 im", + &expect![[r#" + [ + Ok( + Token { + kind: Literal( + Imaginary, + ), + span: Span { + lo: 0, + hi: 6, + }, + }, + ), + ] + "#]], + ); +} + +#[test] +fn negative_imag() { check( - "-123L", + "-123im", &expect![[r#" [ Ok( @@ -360,12 +380,12 @@ fn negative_bigint() { ), Ok( Token { - kind: BigInt( - Decimal, + kind: Literal( + Imaginary, ), span: Span { lo: 1, - hi: 5, + hi: 6, }, }, ), @@ -375,9 +395,9 @@ fn negative_bigint() { } #[test] -fn positive_bigint() { +fn positive_imag() { check( - "+123L", + "+123im", &expect![[r#" [ Ok( @@ -393,12 +413,12 @@ fn positive_bigint() { ), Ok( Token { - kind: BigInt( - Decimal, + kind: Literal( + Imaginary, ), span: Span { lo: 1, - hi: 5, + hi: 6, }, }, ), @@ -415,7 +435,9 @@ fn float() { [ Ok( Token { - kind: Float, + kind: Literal( + Float, + ), span: Span { lo: 0, hi: 4, @@ -446,7 +468,9 @@ fn negative_float() { ), Ok( Token { - kind: Float, + kind: Literal( + Float, + ), span: Span { lo: 1, hi: 5, @@ -477,7 +501,9 @@ fn positive_float() { ), Ok( Token { - kind: Float, + kind: Literal( + Float, + ), span: Span { lo: 1, hi: 5, @@ -497,20 +523,11 @@ fn leading_point() { [ Ok( Token { - kind: Dot, - span: Span { - lo: 0, - hi: 1, - }, - }, - ), - Ok( - Token { - kind: Int( - Decimal, + kind: Literal( + Float, ), span: Span { - lo: 1, + lo: 0, hi: 2, }, }, @@ -528,7 +545,9 @@ fn trailing_point() { [ Ok( Token { - kind: Float, + kind: Literal( + Float, + ), span: Span { lo: 0, hi: 2, @@ -544,75 +563,15 @@ fn trailing_point() { fn leading_zero_float() { check( "0.42", - &expect![[r#" - [ - Ok( - Token { - kind: Float, - span: Span { - lo: 0, - hi: 4, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn dot_dot_int() { - check( - "..1", &expect![[r#" [ Ok( Token { - kind: DotDot, - span: Span { - lo: 0, - hi: 2, - }, - }, - ), - Ok( - Token { - kind: Int( - Decimal, + kind: Literal( + Float, ), - span: Span { - lo: 2, - hi: 3, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn dot_dot_dot_int() { - check( - "...1", - &expect![[r#" - [ - Ok( - Token { - kind: DotDotDot, span: Span { lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: Int( - Decimal, - ), - span: Span { - lo: 3, hi: 4, }, }, @@ -623,16 +582,14 @@ fn dot_dot_dot_int() { } #[test] -fn int_dot_dot() { +fn dot_float() { check( - "1..", + "..1", &expect![[r#" [ Ok( Token { - kind: Int( - Decimal, - ), + kind: Dot, span: Span { lo: 0, hi: 1, @@ -641,7 +598,9 @@ fn int_dot_dot() { ), Ok( Token { - kind: DotDot, + kind: Literal( + Float, + ), span: Span { lo: 1, hi: 3, @@ -654,28 +613,28 @@ fn int_dot_dot() { } #[test] -fn int_dot_dot_dot() { +fn float_dot() { check( - "1...", + "1..", &expect![[r#" [ Ok( Token { - kind: Int( - Decimal, + kind: Literal( + Float, ), span: Span { lo: 0, - hi: 1, + hi: 2, }, }, ), Ok( Token { - kind: DotDotDot, + kind: Dot, span: Span { - lo: 1, - hi: 4, + lo: 2, + hi: 3, }, }, ), @@ -685,37 +644,37 @@ fn int_dot_dot_dot() { } #[test] -fn dot_dot_dot_int_dot_dot_dot() { +fn dot_dot_int_dot_dot() { check( - "...1...", + "..1..", &expect![[r#" [ Ok( Token { - kind: DotDotDot, + kind: Dot, span: Span { lo: 0, - hi: 3, + hi: 1, }, }, ), Ok( Token { - kind: Int( - Decimal, + kind: Literal( + Float, ), span: Span { - lo: 3, + lo: 1, hi: 4, }, }, ), Ok( Token { - kind: DotDotDot, + kind: Dot, span: Span { lo: 4, - hi: 7, + hi: 5, }, }, ), @@ -732,18 +691,11 @@ fn two_points_with_leading() { [ Ok( Token { - kind: Dot, + kind: Literal( + Float, + ), span: Span { lo: 0, - hi: 1, - }, - }, - ), - Ok( - Token { - kind: Float, - span: Span { - lo: 1, hi: 4, }, }, @@ -761,18 +713,11 @@ fn leading_point_exp() { [ Ok( Token { - kind: Dot, + kind: Literal( + Float, + ), span: Span { lo: 0, - hi: 1, - }, - }, - ), - Ok( - Token { - kind: Float, - span: Span { - lo: 1, hi: 4, }, }, @@ -790,7 +735,7 @@ fn ident() { [ Ok( Token { - kind: Ident, + kind: Identifier, span: Span { lo: 0, hi: 3, @@ -810,8 +755,8 @@ fn string() { [ Ok( Token { - kind: String( - Normal, + kind: Literal( + String, ), span: Span { lo: 0, @@ -832,8 +777,8 @@ fn string_empty() { [ Ok( Token { - kind: String( - Normal, + kind: Literal( + String, ), span: Span { lo: 0, @@ -866,41 +811,17 @@ fn string_missing_ending() { } #[test] -fn interpolated_string_missing_ending() { - check( - r#"$"string"#, - &expect![[r#" - [ - Err( - UnterminatedString( - Span { - lo: 0, - hi: 0, - }, - ), - ), - ] - "#]], - ); -} - -#[test] -fn interpolated_string() { +fn hardware_qubit() { check( - r#"$"string""#, + r"$12", &expect![[r#" [ Ok( Token { - kind: String( - Interpolated( - DollarQuote, - Quote, - ), - ), + kind: HardwareQubit, span: Span { lo: 0, - hi: 9, + hi: 3, }, }, ), @@ -910,47 +831,28 @@ fn interpolated_string() { } #[test] -fn interpolated_string_braced() { +fn unknown() { check( - r#"$"{x}""#, + "##", &expect![[r#" [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { + Err( + Unknown( + '#', + Span { lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 3, - hi: 4, + hi: 1, }, - }, + ), ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 4, - hi: 6, + Err( + Unknown( + '#', + Span { + lo: 1, + hi: 2, }, - }, + ), ), ] "#]], @@ -958,22 +860,17 @@ fn interpolated_string_braced() { } #[test] -fn interpolated_string_escape_brace() { +fn comment() { check( - r#"$"\{""#, + "//comment\nx", &expect![[r#" [ Ok( Token { - kind: String( - Interpolated( - DollarQuote, - Quote, - ), - ), + kind: Identifier, span: Span { - lo: 0, - hi: 5, + lo: 10, + hi: 11, }, }, ), @@ -983,22 +880,17 @@ fn interpolated_string_escape_brace() { } #[test] -fn interpolated_string_unclosed_brace() { +fn block_comment() { check( - r#"$"{"#, + "/*comment*/x", &expect![[r#" [ Ok( Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), + kind: Identifier, span: Span { - lo: 0, - hi: 3, + lo: 11, + hi: 12, }, }, ), @@ -1008,543 +900,14 @@ fn interpolated_string_unclosed_brace() { } #[test] -fn interpolated_string_unclosed_brace_quote() { +fn comment_four_slashes() { check( - r#"$"{""#, + "////comment\nx", &expect![[r#" [ Ok( Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Err( - UnterminatedString( - Span { - lo: 3, - hi: 3, - }, - ), - ), - ] - "#]], - ); -} - -#[test] -fn interpolated_string_unopened_brace() { - check( - r#"$"}"#, - &expect![[r#" - [ - Err( - UnterminatedString( - Span { - lo: 0, - hi: 0, - }, - ), - ), - ] - "#]], - ); -} - -#[test] -fn interpolated_string_unopened_brace_quote() { - check( - r#"$"}""#, - &expect![[r#" - [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - Quote, - ), - ), - span: Span { - lo: 0, - hi: 4, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn interpolated_string_braced_index() { - check( - r#"$"{xs[0]}""#, - &expect![[r#" - [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 3, - hi: 5, - }, - }, - ), - Ok( - Token { - kind: Open( - Bracket, - ), - span: Span { - lo: 5, - hi: 6, - }, - }, - ), - Ok( - Token { - kind: Int( - Decimal, - ), - span: Span { - lo: 6, - hi: 7, - }, - }, - ), - Ok( - Token { - kind: Close( - Bracket, - ), - span: Span { - lo: 7, - hi: 8, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 8, - hi: 10, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn interpolated_string_two_braced() { - check( - r#"$"{x} {y}""#, - &expect![[r#" - [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 3, - hi: 4, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - LBrace, - ), - ), - span: Span { - lo: 4, - hi: 7, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 7, - hi: 8, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 8, - hi: 10, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn interpolated_string_braced_normal_string() { - check( - r#"$"{"{}"}""#, - &expect![[r#" - [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: String( - Normal, - ), - span: Span { - lo: 3, - hi: 7, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 7, - hi: 9, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn nested_interpolated_string() { - check( - r#"$"{$"{x}"}""#, - &expect![[r#" - [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 0, - hi: 3, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 3, - hi: 6, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 6, - hi: 7, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 7, - hi: 9, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 9, - hi: 11, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn nested_interpolated_string_with_exprs() { - check( - r#"$"foo {x + $"bar {y}"} baz""#, - &expect![[r#" - [ - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 0, - hi: 7, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 7, - hi: 8, - }, - }, - ), - Ok( - Token { - kind: ClosedBinOp( - Plus, - ), - span: Span { - lo: 9, - hi: 10, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - DollarQuote, - LBrace, - ), - ), - span: Span { - lo: 11, - hi: 18, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 18, - hi: 19, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 19, - hi: 21, - }, - }, - ), - Ok( - Token { - kind: String( - Interpolated( - RBrace, - Quote, - ), - ), - span: Span { - lo: 21, - hi: 27, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn unknown() { - check( - "##", - &expect![[r#" - [ - Err( - Unknown( - '#', - Span { - lo: 0, - hi: 1, - }, - ), - ), - Err( - Unknown( - '#', - Span { - lo: 1, - hi: 2, - }, - ), - ), - ] - "#]], - ); -} - -#[test] -fn comment() { - check( - "//comment\nx", - &expect![[r#" - [ - Ok( - Token { - kind: Ident, - span: Span { - lo: 10, - hi: 11, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn doc_comment() { - check( - "///comment\nx", - &expect![[r#" - [ - Ok( - Token { - kind: DocComment, - span: Span { - lo: 0, - hi: 10, - }, - }, - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 11, - hi: 12, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn comment_four_slashes() { - check( - "////comment\nx", - &expect![[r#" - [ - Ok( - Token { - kind: Ident, + kind: Identifier, span: Span { lo: 12, hi: 13, @@ -1555,157 +918,3 @@ fn comment_four_slashes() { "#]], ); } - -#[test] -fn unfinished_generic() { - check( - "' T", - &expect![[r#" - [ - Err( - Incomplete( - Ident, - AposIdent, - Whitespace, - Span { - lo: 1, - hi: 3, - }, - ), - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 3, - hi: 4, - }, - }, - ), - ] - "#]], - ); -} -#[test] -fn unfinished_generic_2() { - check( - "'// test - T", - &expect![[r#" - [ - Err( - Incomplete( - Ident, - AposIdent, - Comment( - Normal, - ), - Span { - lo: 1, - hi: 8, - }, - ), - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 18, - hi: 19, - }, - }, - ), - ] - "#]], - ); -} - -#[test] -fn unfinished_generic_3() { - check( - "' T", - &expect![[r#" - [ - Err( - Incomplete( - Ident, - AposIdent, - Whitespace, - Span { - lo: 1, - hi: 5, - }, - ), - ), - Ok( - Token { - kind: Ident, - span: Span { - lo: 5, - hi: 6, - }, - }, - ), - ] - "#]], - ); -} -#[test] -fn correct_generic() { - check( - "'T", - &expect![[r#" - [ - Ok( - Token { - kind: AposIdent, - span: Span { - lo: 0, - hi: 2, - }, - }, - ), - ] - "#]], - ); -} -#[test] -fn generic_missing_ident() { - check( - "'", - &expect![[r#" - [ - Err( - IncompleteEof( - Ident, - AposIdent, - Span { - lo: 1, - hi: 1, - }, - ), - ), - ] - "#]], - ); -} - -#[test] -fn generic_underscore_name() { - check( - "'_", - &expect![[r#" - [ - Ok( - Token { - kind: AposIdent, - span: Span { - lo: 0, - hi: 2, - }, - }, - ), - ] - "#]], - ); -} From 9f379db471382a6dde432686e263324f15b95c12 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 11:18:40 -0800 Subject: [PATCH 15/20] allow unused functions in the meantime --- compiler/qsc_qasm3/src/lex.rs | 2 +- compiler/qsc_qasm3/src/lex/cooked.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex.rs b/compiler/qsc_qasm3/src/lex.rs index df04e5d460..34683c5259 100644 --- a/compiler/qsc_qasm3/src/lex.rs +++ b/compiler/qsc_qasm3/src/lex.rs @@ -1,9 +1,9 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. +#![allow(unused)] pub mod cooked; pub mod raw; - use enum_iterator::Sequence; pub(super) use cooked::{Error, Lexer, Token, TokenKind}; diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 63eae6c144..5b9c41fcf7 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -459,7 +459,7 @@ impl<'a> Lexer<'a> { let mut iter = tokens.iter(); while self.next_if_eq(*(iter.next().expect("tokens should have at least one token"))) { for token in iter { - self.expect(*token, complete)? + self.expect(*token, complete)?; } iter = tokens.iter(); } @@ -482,7 +482,7 @@ impl<'a> Lexer<'a> { } raw::TokenKind::Ident => { let ident = &self.input[(token.offset as usize)..(self.offset() as usize)]; - Ok(Some(self.ident(ident))) + Ok(Some(Self::ident(ident))) } raw::TokenKind::HardwareQubit => Ok(Some(TokenKind::HardwareQubit)), raw::TokenKind::LiteralFragment(_) => { @@ -650,7 +650,7 @@ impl<'a> Lexer<'a> { } } - fn ident(&mut self, ident: &str) -> TokenKind { + fn ident(ident: &str) -> TokenKind { match ident { "gphase" => TokenKind::GPhase, "inv" => TokenKind::Inv, From 4b236d146dd7252104ed9e787901e1c6bf3cf515 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 19:45:57 -0800 Subject: [PATCH 16/20] fix hardware_qubits, floats, and leading underscores --- compiler/qsc_qasm3/src/lex/cooked/tests.rs | 20 ++ compiler/qsc_qasm3/src/lex/raw.rs | 208 ++++++++++++----- compiler/qsc_qasm3/src/lex/raw/tests.rs | 260 ++++++++++++++++++++- 3 files changed, 435 insertions(+), 53 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs index f2f228ed92..cd11ad2a5c 100644 --- a/compiler/qsc_qasm3/src/lex/cooked/tests.rs +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -665,6 +665,15 @@ fn dot_dot_int_dot_dot() { ), span: Span { lo: 1, + hi: 3, + }, + }, + ), + Ok( + Token { + kind: Dot, + span: Span { + lo: 3, hi: 4, }, }, @@ -696,6 +705,17 @@ fn two_points_with_leading() { ), span: Span { lo: 0, + hi: 2, + }, + }, + ), + Ok( + Token { + kind: Literal( + Float, + ), + span: Span { + lo: 2, hi: 4, }, }, diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 3d4393e95f..36b6459ad5 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -23,6 +23,17 @@ use std::{ str::CharIndices, }; +/// An enum used internally by the raw lexer to signal whether +/// a token was partially parsed or if it wasn't parsed at all. +enum LexError { + /// An incomplete token was parsed, e.g., a string missing + /// the closing quote or a number ending in an underscore. + Incomplete(T), + /// The token wasn't parsed and no characters were consumed + /// when trying to parse the token. + None, +} + /// A raw token. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Token { @@ -200,6 +211,10 @@ impl<'a> Lexer<'a> { } } + fn next_if(&mut self, f: impl FnOnce(char) -> bool) -> bool { + self.chars.next_if(|i| f(i.1)).is_some() + } + fn next_if_eq(&mut self, c: char) -> bool { self.chars.next_if(|i| i.1 == c).is_some() } @@ -265,16 +280,15 @@ impl<'a> Lexer<'a> { } fn ident(&mut self, c: char) -> Option { - let first = self.first(); - let second = self.second(); - // Check for some special literal fragments. + let first = self.first(); if c == 's' && (first.is_none() || first.is_some_and(|c1| c1 != '_' && !c1.is_alphanumeric())) { return Some(TokenKind::LiteralFragment(LiteralFragmentKind::S)); } + let second = self.second(); if let Some(c1) = first { if second.is_none() || second.is_some_and(|c1| c1 != '_' && !c1.is_alphanumeric()) { let fragment = match (c, c1) { @@ -288,7 +302,7 @@ impl<'a> Lexer<'a> { if fragment.is_some() { // consume `first` before returning. - self.next(); + self.chars.next(); return fragment; } } @@ -302,24 +316,84 @@ impl<'a> Lexer<'a> { } } - fn number(&mut self, c: char) -> Option { - self.leading_zero(c).or_else(|| self.decimal(c)) + fn leading_underscore(&mut self) -> TokenKind { + self.eat_while(|c| c == '_'); + + match self.chars.next() { + None => TokenKind::Ident, + Some((_, c)) => { + if c.is_alphabetic() { + TokenKind::Ident + } else if c.is_ascii() { + match self.number(c) { + Ok(number) => match number { + Number::Float | Number::Int(Radix::Decimal) => { + TokenKind::Number(number) + } + // Binary, Octal, and Hexadecimal literals can't be prefixed by underscores + // Therefore if you read something like `___0b11`, it is an identifier. + // Therefore, we read the rest of it and return `TokenKind::Ident`. + Number::Int(_) => { + self.eat_while(|c| c == '_' || c.is_alphanumeric()); + TokenKind::Ident + } + }, + Err(LexError::None) => { + unreachable!("the first character is a number, this case is impossible") + } + Err(LexError::Incomplete(_)) => TokenKind::Unknown, + } + } else { + TokenKind::Unknown + } + } + } } - fn leading_dot(&mut self, c: char) -> bool { - if c == '.' && self.first().is_some_and(|c| char::is_ascii_digit(&c)) { - self.next(); - self.eat_while(|c| c == '_' || c.is_ascii_digit()); - self.exp(); - true + fn number(&mut self, c: char) -> Result> { + self.leading_zero(c) + .or_else(|_| self.leading_dot(c)) + .or_else(|_| self.decimal_or_float(c)) + } + + fn leading_dot(&mut self, c: char) -> Result> { + let first = self.first(); + if c == '.' && first.is_some_and(|c| c == '_' || c.is_ascii_digit()) { + self.chars.next(); + let c1 = first.expect("first.is_some_and() succeeded"); + self.decimal(c1)?; + match self.exp() { + Ok(()) | Err(LexError::None) => Ok(Number::Float), + Err(_) => Err(LexError::Incomplete(Number::Float)), + } } else { - false + Err(LexError::None) } } - fn leading_zero(&mut self, c: char) -> Option { + fn mid_dot(&mut self, c: char) -> Result> { + if c == '.' { + match self.first() { + Some(c1) if c1 == '_' || c1.is_ascii_digit() => { + self.chars.next(); + match self.decimal(c1) { + Err(LexError::Incomplete(_)) => Err(LexError::Incomplete(Number::Float)), + Ok(_) | Err(LexError::None) => match self.exp() { + Ok(()) | Err(LexError::None) => Ok(Number::Float), + Err(_) => Err(LexError::Incomplete(Number::Float)), + }, + } + } + None | Some(_) => Ok(Number::Float), + } + } else { + Err(LexError::None) + } + } + + fn leading_zero(&mut self, c: char) -> Result> { if c != '0' { - return None; + return Err(LexError::None); } let radix = if self.next_if_eq('b') || self.next_if_eq('B') { @@ -332,47 +406,75 @@ impl<'a> Lexer<'a> { Radix::Decimal }; - self.eat_while(|c| c == '_' || c.is_digit(radix.into())); - if radix == Radix::Decimal && self.float() { - Some(Number::Float) - } else { - Some(Number::Int(radix)) + let last_eaten = self.eat_while(|c| c == '_' || c.is_digit(radix.into())); + + match radix { + Radix::Binary | Radix::Octal | Radix::Hexadecimal => match last_eaten { + None | Some('_') => Err(LexError::Incomplete(Number::Int(radix))), + _ => Ok(Number::Int(radix)), + }, + Radix::Decimal => match self.first() { + Some(c1 @ '.') => { + self.chars.next(); + self.mid_dot(c1) + } + None | Some(_) => Ok(Number::Int(Radix::Decimal)), + }, } } - fn decimal(&mut self, c: char) -> Option { - if !c.is_ascii_digit() { - return None; + /// Parses a decimal integer. + /// TODO: add .g4 pattern + fn decimal(&mut self, c: char) -> Result> { + if c != '_' && !c.is_ascii_digit() { + return Err(LexError::None); } - self.eat_while(|c| c == '_' || c.is_ascii_digit()); + let last_eaten = self.eat_while(|c| c == '_' || c.is_ascii_digit()); - if self.float() { - Some(Number::Float) - } else { - Some(Number::Int(Radix::Decimal)) + match last_eaten { + None if c == '_' => Err(LexError::None), + Some('_') => Err(LexError::Incomplete(Number::Int(Radix::Decimal))), + _ => Ok(Number::Int(Radix::Decimal)), } } - fn float(&mut self) -> bool { - // Watch out for ranges: `0..` should be an integer followed by two dots. - if self.first() == Some('.') { - self.chars.next(); - self.eat_while(|c| c == '_' || c.is_ascii_digit()); - self.exp(); - true - } else { - self.exp() + fn decimal_or_float(&mut self, c: char) -> Result> { + self.decimal(c)?; + match self.first() { + None => Ok(Number::Int(Radix::Decimal)), + Some(first @ '.') => { + self.chars.next(); + self.mid_dot(first) + } + _ => match self.exp() { + Ok(()) => Ok(Number::Float), + Err(LexError::None) => Ok(Number::Int(Radix::Decimal)), + Err(_) => Err(LexError::Incomplete(Number::Float)), + }, } } - fn exp(&mut self) -> bool { - if self.next_if_eq('e') || self.next_if_eq('E') { + /// Parses an exponent. + fn exp(&mut self) -> Result<(), LexError> { + if self.next_if(|c| c == 'e' || c == 'E') { + // Optionally there could be a + or - sign. self.chars.next_if(|i| i.1 == '+' || i.1 == '-'); - self.eat_while(|c| c.is_ascii_digit()); - true + + // If the next character isn't a digit or an + // underscore we issue an error without consuming it. + let first = self.first().ok_or(LexError::Incomplete(Number::Float))?; + if first != '_' && !first.is_ascii_digit() { + Err(LexError::Incomplete(Number::Float)) + } else { + self.chars.next(); + match self.decimal(first) { + Ok(_) => Ok(()), + Err(_) => Err(LexError::Incomplete(Number::Float)), + } + } } else { - false + Err(LexError::None) } } @@ -383,7 +485,7 @@ impl<'a> Lexer<'a> { if let Some(bitstring) = self.bitstring() { // consume the closing '"' - self.next(); + self.chars.next(); return Some(bitstring); } @@ -421,8 +523,7 @@ impl<'a> Lexer<'a> { fn hardware_qubit(&mut self, c: char) -> bool { if c == '$' { - self.eat_while(|c| c.is_ascii_digit()); - true + self.eat_while(|c| c.is_ascii_digit()).is_some() } else { false } @@ -440,18 +541,21 @@ impl Iterator for Lexer<'_> { TokenKind::Whitespace } else if self.newline(c) { TokenKind::Newline + } else if c == '_' { + self.leading_underscore() } else if let Some(ident) = self.ident(c) { ident } else if self.hardware_qubit(c) { TokenKind::HardwareQubit - } else if self.leading_dot(c) { - TokenKind::Number(Number::Float) } else { - self.number(c) - .map(TokenKind::Number) - .or_else(|| self.string(c)) - .or_else(|| single(c).map(TokenKind::Single)) - .unwrap_or(TokenKind::Unknown) + match self.number(c) { + Ok(number) => TokenKind::Number(number), + Err(LexError::Incomplete(_)) => TokenKind::Unknown, + Err(LexError::None) => self + .string(c) + .or_else(|| single(c).map(TokenKind::Single)) + .unwrap_or(TokenKind::Unknown), + } }; let offset: u32 = offset.try_into().expect("offset should fit into u32"); Some(Token { diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index 4b2930a14c..854034ca68 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -323,7 +323,11 @@ fn number_underscore_prefix() { &expect![[r#" [ Token { - kind: Ident, + kind: Number( + Int( + Decimal, + ), + ), offset: 0, }, ] @@ -354,6 +358,46 @@ fn float_dot() { ); } +#[test] +fn float_dot2() { + check( + ".0.", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + Token { + kind: Single( + Dot, + ), + offset: 2, + }, + ] + "#]], + ); +} + +#[test] +fn leading_dot_float() { + check( + ".0", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + #[test] fn dot_float() { check( @@ -875,3 +919,217 @@ fn identifiers_with_fragment_prefixes() { "#]], ); } + +#[test] +fn leading_underscores_decimal() { + check( + "___3", + &expect![[r#" + [ + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_underscores_float() { + check( + "___3.", + &expect![[r#" + [ + Token { + kind: Number( + Float, + ), + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_underscores_binary() { + check( + "___0b11", + &expect![[r#" + [ + Token { + kind: Ident, + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_underscores_binary_extended() { + check( + "___0b11abc", + &expect![[r#" + [ + Token { + kind: Ident, + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn leading_underscores_identifier() { + check( + "___a", + &expect![[r#" + [ + Token { + kind: Ident, + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn hardware_qubit() { + check( + "$12", + &expect![[r#" + [ + Token { + kind: HardwareQubit, + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn hardware_qubit_dot() { + check( + "$2.", + &expect![[r#" + [ + Token { + kind: HardwareQubit, + offset: 0, + }, + Token { + kind: Single( + Dot, + ), + offset: 2, + }, + ] + "#]], + ); +} + +#[test] +fn incomplete_hardware_qubit() { + check( + "$", + &expect![[r#" + [ + Token { + kind: Unknown, + offset: 0, + }, + ] + "#]], + ); +} + +#[test] +fn incomplete_hardware_qubit_identifier() { + check( + "$a", + &expect![[r#" + [ + Token { + kind: Unknown, + offset: 0, + }, + Token { + kind: Ident, + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn incomplete_hardware_qubit_float() { + check( + "$.2", + &expect![[r#" + [ + Token { + kind: Unknown, + offset: 0, + }, + Token { + kind: Number( + Float, + ), + offset: 1, + }, + ] + "#]], + ); +} + +#[test] +fn hardware_qubit_with_underscore_at_end() { + check( + "$12_", + &expect![[r#" + [ + Token { + kind: HardwareQubit, + offset: 0, + }, + Token { + kind: Ident, + offset: 3, + }, + ] + "#]], + ); +} + +#[test] +fn hardware_qubit_with_underscore_in_the_middle() { + check( + "$12_3", + &expect![[r#" + [ + Token { + kind: HardwareQubit, + offset: 0, + }, + Token { + kind: Number( + Int( + Decimal, + ), + ), + offset: 3, + }, + ] + "#]], + ); +} From 998a515ec47aab400ed09e30713fda00a60655ce Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Thu, 30 Jan 2025 20:27:08 -0800 Subject: [PATCH 17/20] add comments to raw lexer --- compiler/qsc_qasm3/src/lex/raw.rs | 48 +++++++++++++++++++++---- compiler/qsc_qasm3/src/lex/raw/tests.rs | 2 +- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index 36b6459ad5..d5e0fb67de 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -281,6 +281,8 @@ impl<'a> Lexer<'a> { fn ident(&mut self, c: char) -> Option { // Check for some special literal fragments. + // We need to check that the character following the fragment isn't an + // underscore or an alphanumeric character, else it is an identifier. let first = self.first(); if c == 's' && (first.is_none() || first.is_some_and(|c1| c1 != '_' && !c1.is_alphanumeric())) @@ -301,7 +303,7 @@ impl<'a> Lexer<'a> { }; if fragment.is_some() { - // consume `first` before returning. + // Consume `first` before returning. self.chars.next(); return fragment; } @@ -316,15 +318,22 @@ impl<'a> Lexer<'a> { } } + /// Qasm allows identifiers, decimal integers, and floats to start with + /// an underscore, so we need this rule to disambiguate those cases. fn leading_underscore(&mut self) -> TokenKind { + // First we eat through all the underscores. self.eat_while(|c| c == '_'); + // Then we check the first character following the underscores. match self.chars.next() { + // If we hit the EOF, it is an identifier, since we had at least one underscore. None => TokenKind::Ident, Some((_, c)) => { + // If it is alphabetic, it is an identifier. We read the rest of it and return. if c.is_alphabetic() { + self.eat_while(|c| c == '_' || c.is_alphanumeric()); TokenKind::Ident - } else if c.is_ascii() { + } else if c.is_ascii_digit() { match self.number(c) { Ok(number) => match number { Number::Float | Number::Int(Radix::Decimal) => { @@ -356,6 +365,9 @@ impl<'a> Lexer<'a> { .or_else(|_| self.decimal_or_float(c)) } + /// This rule allows us to differentiate a leading dot from a mid dot. + /// A float starting with a leading dot must contain at least one digit + /// after the dot. fn leading_dot(&mut self, c: char) -> Result> { let first = self.first(); if c == '.' && first.is_some_and(|c| c == '_' || c.is_ascii_digit()) { @@ -371,6 +383,9 @@ impl<'a> Lexer<'a> { } } + /// A float with a middle dot could optionally contain numbers after the dot. + /// This rule is necessary to differentiate from the floats with a leading dot, + /// which must have digits after the dot. fn mid_dot(&mut self, c: char) -> Result> { if c == '.' { match self.first() { @@ -391,6 +406,9 @@ impl<'a> Lexer<'a> { } } + /// This rule parses binary, octal, hexadecimal numbers, or decimal/floats + /// if the next character isn't a radix specifier. + /// Numbers in Qasm aren't allowed to end in an underscore. fn leading_zero(&mut self, c: char) -> Result> { if c != '0' { return Err(LexError::None); @@ -423,8 +441,8 @@ impl<'a> Lexer<'a> { } } - /// Parses a decimal integer. - /// TODO: add .g4 pattern + /// This rule parses a decimal integer. + /// Numbers in QASM aren't allowed to end in an underscore. fn decimal(&mut self, c: char) -> Result> { if c != '_' && !c.is_ascii_digit() { return Err(LexError::None); @@ -439,6 +457,8 @@ impl<'a> Lexer<'a> { } } + /// This rule disambiguates between a decimal integer and a float with a + /// mid dot, like `12.3`. fn decimal_or_float(&mut self, c: char) -> Result> { self.decimal(c)?; match self.first() { @@ -455,7 +475,11 @@ impl<'a> Lexer<'a> { } } - /// Parses an exponent. + /// Parses an exponent. Errors if the exponent was missing or incomplete. + /// The rule `decimal_or_float` uses the `LexError::None` variant of the error + /// to classify the token as an integer. + /// The `leading_dot` and `mid_dot` rules use the `LexError::None` variant to + /// classify the token as a float. fn exp(&mut self) -> Result<(), LexError> { if self.next_if(|c| c == 'e' || c == 'E') { // Optionally there could be a + or - sign. @@ -478,13 +502,16 @@ impl<'a> Lexer<'a> { } } + /// Tries to parse a string or a bitstring. QASM strings can be enclosed + /// by double quotes or single quotes. Bitstrings can only be enclosed by + /// double quotes and contain 0s and 1s. fn string(&mut self, string_start: char) -> Option { if string_start != '"' && string_start != '\'' { return None; } if let Some(bitstring) = self.bitstring() { - // consume the closing '"' + // Try consuming the closing '"'. self.chars.next(); return Some(bitstring); } @@ -501,6 +528,8 @@ impl<'a> Lexer<'a> { }) } + /// Parses the body of a bitstring. Bitstrings can only contain 0s and 1s. + /// Returns `None` if it finds an invalid character. fn bitstring(&mut self) -> Option { const STRING_START: char = '"'; @@ -514,13 +543,18 @@ impl<'a> Lexer<'a> { return None; } + // Check the next character to determine if the bitstring is valid and closed, + // valid and open because we reached the EOF, or invalid, in which case we + // will treat it as a regular string. match self.first() { - None => Some(TokenKind::Bitstring { terminated: false }), Some(STRING_START) => Some(TokenKind::Bitstring { terminated: true }), + None => Some(TokenKind::Bitstring { terminated: false }), _ => None, } } + /// Tries parsing a hardware qubit literal, consisting of a `$` sign followed by + /// ASCII digits. fn hardware_qubit(&mut self, c: char) -> bool { if c == '$' { self.eat_while(|c| c.is_ascii_digit()).is_some() diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index 854034ca68..708966292c 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -989,7 +989,7 @@ fn leading_underscores_binary_extended() { #[test] fn leading_underscores_identifier() { check( - "___a", + "___abc", &expect![[r#" [ Token { From 5ca77b3f73e87d9329c5b7670cbbd90397f6c1af Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:09:27 -0800 Subject: [PATCH 18/20] remove leading_underscore rule --- compiler/qsc_qasm3/src/lex/raw.rs | 47 ++----------------------- compiler/qsc_qasm3/src/lex/raw/tests.rs | 45 ++++++----------------- 2 files changed, 14 insertions(+), 78 deletions(-) diff --git a/compiler/qsc_qasm3/src/lex/raw.rs b/compiler/qsc_qasm3/src/lex/raw.rs index d5e0fb67de..65bf3d7fea 100644 --- a/compiler/qsc_qasm3/src/lex/raw.rs +++ b/compiler/qsc_qasm3/src/lex/raw.rs @@ -318,47 +318,6 @@ impl<'a> Lexer<'a> { } } - /// Qasm allows identifiers, decimal integers, and floats to start with - /// an underscore, so we need this rule to disambiguate those cases. - fn leading_underscore(&mut self) -> TokenKind { - // First we eat through all the underscores. - self.eat_while(|c| c == '_'); - - // Then we check the first character following the underscores. - match self.chars.next() { - // If we hit the EOF, it is an identifier, since we had at least one underscore. - None => TokenKind::Ident, - Some((_, c)) => { - // If it is alphabetic, it is an identifier. We read the rest of it and return. - if c.is_alphabetic() { - self.eat_while(|c| c == '_' || c.is_alphanumeric()); - TokenKind::Ident - } else if c.is_ascii_digit() { - match self.number(c) { - Ok(number) => match number { - Number::Float | Number::Int(Radix::Decimal) => { - TokenKind::Number(number) - } - // Binary, Octal, and Hexadecimal literals can't be prefixed by underscores - // Therefore if you read something like `___0b11`, it is an identifier. - // Therefore, we read the rest of it and return `TokenKind::Ident`. - Number::Int(_) => { - self.eat_while(|c| c == '_' || c.is_alphanumeric()); - TokenKind::Ident - } - }, - Err(LexError::None) => { - unreachable!("the first character is a number, this case is impossible") - } - Err(LexError::Incomplete(_)) => TokenKind::Unknown, - } - } else { - TokenKind::Unknown - } - } - } - } - fn number(&mut self, c: char) -> Result> { self.leading_zero(c) .or_else(|_| self.leading_dot(c)) @@ -443,8 +402,10 @@ impl<'a> Lexer<'a> { /// This rule parses a decimal integer. /// Numbers in QASM aren't allowed to end in an underscore. + /// The rule in the .g4 file is + /// `DecimalIntegerLiteral: ([0-9] '_'?)* [0-9];` fn decimal(&mut self, c: char) -> Result> { - if c != '_' && !c.is_ascii_digit() { + if !c.is_ascii_digit() { return Err(LexError::None); } @@ -575,8 +536,6 @@ impl Iterator for Lexer<'_> { TokenKind::Whitespace } else if self.newline(c) { TokenKind::Newline - } else if c == '_' { - self.leading_underscore() } else if let Some(ident) = self.ident(c) { ident } else if self.hardware_qubit(c) { diff --git a/compiler/qsc_qasm3/src/lex/raw/tests.rs b/compiler/qsc_qasm3/src/lex/raw/tests.rs index 708966292c..96ff8383ec 100644 --- a/compiler/qsc_qasm3/src/lex/raw/tests.rs +++ b/compiler/qsc_qasm3/src/lex/raw/tests.rs @@ -316,25 +316,6 @@ fn number_seps() { ); } -#[test] -fn number_underscore_prefix() { - check( - "_123_456", - &expect![[r#" - [ - Token { - kind: Number( - Int( - Decimal, - ), - ), - offset: 0, - }, - ] - "#]], - ); -} - #[test] fn float_dot() { check( @@ -921,17 +902,13 @@ fn identifiers_with_fragment_prefixes() { } #[test] -fn leading_underscores_decimal() { +fn leading_underscores_digit() { check( "___3", &expect![[r#" [ Token { - kind: Number( - Int( - Decimal, - ), - ), + kind: Ident, offset: 0, }, ] @@ -940,17 +917,21 @@ fn leading_underscores_decimal() { } #[test] -fn leading_underscores_float() { +fn leading_underscores_ident_dot() { check( "___3.", &expect![[r#" [ Token { - kind: Number( - Float, - ), + kind: Ident, offset: 0, }, + Token { + kind: Single( + Dot, + ), + offset: 4, + }, ] "#]], ); @@ -1122,11 +1103,7 @@ fn hardware_qubit_with_underscore_in_the_middle() { offset: 0, }, Token { - kind: Number( - Int( - Decimal, - ), - ), + kind: Ident, offset: 3, }, ] From 0f00fb58a8c597e806d2bc4183112970b0aa83e0 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:16:40 -0800 Subject: [PATCH 19/20] remove commented out line --- compiler/qsc_qasm3/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/qsc_qasm3/Cargo.toml b/compiler/qsc_qasm3/Cargo.toml index 7a1f9f72b4..4be5b3092d 100644 --- a/compiler/qsc_qasm3/Cargo.toml +++ b/compiler/qsc_qasm3/Cargo.toml @@ -21,7 +21,6 @@ thiserror = { workspace = true } oq3_source_file = { workspace = true } oq3_syntax = { workspace = true } oq3_parser = { workspace = true } -# oq3_lexer = { workspace = true } oq3_semantics = { workspace = true } [dev-dependencies] From 6172c85d27717e96ea146b1b7ba3d5cdfab91ab6 Mon Sep 17 00:00:00 2001 From: Oscar Puente <156957451+orpuente-MS@users.noreply.github.com> Date: Fri, 31 Jan 2025 12:56:54 -0800 Subject: [PATCH 20/20] move `Annotation` from `Keyword` to `TokenKind`. --- compiler/qsc_qasm3/src/keyword.rs | 3 --- compiler/qsc_qasm3/src/lex/cooked.rs | 4 +++- compiler/qsc_qasm3/src/lex/cooked/tests.rs | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/compiler/qsc_qasm3/src/keyword.rs b/compiler/qsc_qasm3/src/keyword.rs index f762d2de18..dd30c8bd02 100644 --- a/compiler/qsc_qasm3/src/keyword.rs +++ b/compiler/qsc_qasm3/src/keyword.rs @@ -9,7 +9,6 @@ use std::{ #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Sequence)] pub enum Keyword { - Annotation, Box, Break, Cal, @@ -37,7 +36,6 @@ pub enum Keyword { impl Keyword { pub(super) fn as_str(self) -> &'static str { match self { - Keyword::Annotation => "annotation", Keyword::Box => "box", Keyword::Break => "break", Keyword::Cal => "cal", @@ -78,7 +76,6 @@ impl FromStr for Keyword { // frequency in Q# so that fewer comparisons are needed on average. fn from_str(s: &str) -> Result { match s { - "annotation" => Ok(Self::Annotation), "box" => Ok(Self::Box), "break" => Ok(Self::Break), "cal" => Ok(Self::Cal), diff --git a/compiler/qsc_qasm3/src/lex/cooked.rs b/compiler/qsc_qasm3/src/lex/cooked.rs index 5b9c41fcf7..faebc1bb28 100644 --- a/compiler/qsc_qasm3/src/lex/cooked.rs +++ b/compiler/qsc_qasm3/src/lex/cooked.rs @@ -81,6 +81,7 @@ impl Error { /// A token kind. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] pub enum TokenKind { + Annotation, Keyword(Keyword), Type(Type), @@ -137,6 +138,7 @@ pub enum TokenKind { impl Display for TokenKind { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { + TokenKind::Annotation => write!(f, "annotation"), TokenKind::Keyword(keyword) => write!(f, "keyword `{keyword}`"), TokenKind::Type(type_) => write!(f, "keyword `{type_}`"), TokenKind::GPhase => write!(f, "gphase"), @@ -561,7 +563,7 @@ impl<'a> Lexer<'a> { } } Single::At => { - let complete = TokenKind::Keyword(Keyword::Annotation); + let complete = TokenKind::Annotation; self.expect(raw::TokenKind::Ident, complete)?; self.kleen_star( &[raw::TokenKind::Single(Single::Dot), raw::TokenKind::Ident], diff --git a/compiler/qsc_qasm3/src/lex/cooked/tests.rs b/compiler/qsc_qasm3/src/lex/cooked/tests.rs index cd11ad2a5c..7002c957b7 100644 --- a/compiler/qsc_qasm3/src/lex/cooked/tests.rs +++ b/compiler/qsc_qasm3/src/lex/cooked/tests.rs @@ -43,7 +43,8 @@ fn op_string(kind: TokenKind) -> Option { TokenKind::Arrow => Some("->".to_string()), TokenKind::ClosedBinOp(op) => Some(op.to_string()), TokenKind::BinOpEq(super::ClosedBinOp::AmpAmp | super::ClosedBinOp::BarBar) - | TokenKind::Literal(_) => None, + | TokenKind::Literal(_) + | TokenKind::Annotation => None, TokenKind::BinOpEq(op) => Some(format!("{op}=")), TokenKind::ComparisonOp(op) => Some(op.to_string()), TokenKind::Identifier => Some("foo".to_string()),