|
@@ -41,6 +41,12 @@ pub enum TokenizerErrorKind {
|
|
|
|
|
|
|
|
#[error("Unexpected '{{' inside string interpolation")]
|
|
#[error("Unexpected '{{' inside string interpolation")]
|
|
|
UnexpectedCurlyInInterpolation,
|
|
UnexpectedCurlyInInterpolation,
|
|
|
|
|
+
|
|
|
|
|
+ #[error("Unexpected closing of scope other than the current scope")]
|
|
|
|
|
+ UnexpectedScopeClosing {
|
|
|
|
|
+ current_scope: Option<Scope>,
|
|
|
|
|
+ closing_scope_type: ScopeType,
|
|
|
|
|
+ },
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Error, PartialEq, Eq)]
|
|
#[derive(Debug, Error, PartialEq, Eq)]
|
|
@@ -223,21 +229,22 @@ fn is_identifier_continue(c: char) -> bool {
|
|
|
&& c != '⋅'
|
|
&& c != '⋅'
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+#[cfg_attr(debug_assertions, derive(Debug))]
|
|
|
|
|
+#[derive(Clone, Copy, PartialEq, Eq)]
|
|
|
|
|
+pub enum ScopeType {
|
|
|
|
|
+ OrdinaryCurly,
|
|
|
|
|
+ Interpolation,
|
|
|
|
|
+ String,
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
/// When scanning a string interpolation like `"foo = {foo}, and bar = {bar}."`,
|
|
/// When scanning a string interpolation like `"foo = {foo}, and bar = {bar}."`,
|
|
|
/// the tokenizer needs to keep track of where it currently is, because we allow
|
|
/// the tokenizer needs to keep track of where it currently is, because we allow
|
|
|
/// for (almost) arbitrary expressions inside the {…} part.
|
|
/// for (almost) arbitrary expressions inside the {…} part.
|
|
|
#[cfg_attr(debug_assertions, derive(Debug))]
|
|
#[cfg_attr(debug_assertions, derive(Debug))]
|
|
|
-enum InterpolationState {
|
|
|
|
|
- /// We are not inside curly braces.
|
|
|
|
|
- Outside,
|
|
|
|
|
- /// We are currently scanning the inner part of an interpolation.
|
|
|
|
|
- Inside,
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-impl InterpolationState {
|
|
|
|
|
- fn is_inside(&self) -> bool {
|
|
|
|
|
- matches!(self, InterpolationState::Inside)
|
|
|
|
|
- }
|
|
|
|
|
|
|
+#[derive(Clone, Copy, PartialEq, Eq)]
|
|
|
|
|
+pub struct Scope {
|
|
|
|
|
+ scope_type: ScopeType,
|
|
|
|
|
+ scope_start: ByteIndex,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#[cfg_attr(debug_assertions, derive(Debug))]
|
|
#[cfg_attr(debug_assertions, derive(Debug))]
|
|
@@ -247,10 +254,8 @@ struct Tokenizer {
|
|
|
token_start: ByteIndex,
|
|
token_start: ByteIndex,
|
|
|
code_source_id: usize,
|
|
code_source_id: usize,
|
|
|
|
|
|
|
|
- // Special fields / state for parsing string interpolations
|
|
|
|
|
- string_start: ByteIndex,
|
|
|
|
|
- interpolation_start: ByteIndex,
|
|
|
|
|
- interpolation_state: InterpolationState,
|
|
|
|
|
|
|
+ // Special state for parsing nested strings and/or structs in string interpolations
|
|
|
|
|
+ scopes: Vec<Scope>,
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
fn char_at(s: &str, byte_index: usize) -> Option<char> {
|
|
fn char_at(s: &str, byte_index: usize) -> Option<char> {
|
|
@@ -265,9 +270,7 @@ impl Tokenizer {
|
|
|
token_start: ByteIndex(0),
|
|
token_start: ByteIndex(0),
|
|
|
|
|
|
|
|
code_source_id,
|
|
code_source_id,
|
|
|
- string_start: ByteIndex(0),
|
|
|
|
|
- interpolation_start: ByteIndex(0),
|
|
|
|
|
- interpolation_state: InterpolationState::Outside,
|
|
|
|
|
|
|
+ scopes: Vec::new(),
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -392,6 +395,58 @@ impl Tokenizer {
|
|
|
Ok(())
|
|
Ok(())
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ fn open_scope(&mut self, scope_type: ScopeType) -> Result<()> {
|
|
|
|
|
+ let new_scope = Scope {
|
|
|
|
|
+ scope_type,
|
|
|
|
|
+ scope_start: self.last,
|
|
|
|
|
+ };
|
|
|
|
|
+ self.scopes.push(new_scope);
|
|
|
|
|
+
|
|
|
|
|
+ Ok(())
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ fn close_scope(&mut self, scope_type: ScopeType) -> Result<Scope> {
|
|
|
|
|
+ if self.is_directly_inside(scope_type) {
|
|
|
|
|
+ let scope = self.scopes.pop().unwrap();
|
|
|
|
|
+ Ok(scope)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ return Err(TokenizerError {
|
|
|
|
|
+ kind: TokenizerErrorKind::UnexpectedScopeClosing {
|
|
|
|
|
+ current_scope: self.scopes.last().copied(),
|
|
|
|
|
+ closing_scope_type: scope_type,
|
|
|
|
|
+ },
|
|
|
|
|
+ span: Span {
|
|
|
|
|
+ start: self.last,
|
|
|
|
|
+ end: self.current,
|
|
|
|
|
+ code_source_id: self.code_source_id,
|
|
|
|
|
+ },
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ fn is_directly_inside(&mut self, scope_type: ScopeType) -> bool {
|
|
|
|
|
+ self.scopes
|
|
|
|
|
+ .last()
|
|
|
|
|
+ .is_some_and(|scope| scope.scope_type == scope_type)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ fn is_inside_child_of(&mut self, scope_type: ScopeType) -> bool {
|
|
|
|
|
+ let Some(i) = self.scopes.len().checked_sub(2) else {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ };
|
|
|
|
|
+ self.scopes
|
|
|
|
|
+ .get(i)
|
|
|
|
|
+ .is_some_and(|scope| scope.scope_type == scope_type)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ fn scope_start(&mut self, scope_type: ScopeType) -> Option<ByteIndex> {
|
|
|
|
|
+ self.scopes
|
|
|
|
|
+ .iter()
|
|
|
|
|
+ .filter(|scope| scope.scope_type == scope_type)
|
|
|
|
|
+ .last()
|
|
|
|
|
+ .map(|scope| scope.scope_start)
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
fn scan_single_token<'a>(&mut self, input: &'a str) -> Result<Option<Token<'a>>> {
|
|
fn scan_single_token<'a>(&mut self, input: &'a str) -> Result<Option<Token<'a>>> {
|
|
|
fn is_ascii_hex_digit(c: char) -> bool {
|
|
fn is_ascii_hex_digit(c: char) -> bool {
|
|
|
c.is_ascii_hexdigit()
|
|
c.is_ascii_hexdigit()
|
|
@@ -476,8 +531,14 @@ impl Tokenizer {
|
|
|
')' => TokenKind::RightParen,
|
|
')' => TokenKind::RightParen,
|
|
|
'[' => TokenKind::LeftBracket,
|
|
'[' => TokenKind::LeftBracket,
|
|
|
']' => TokenKind::RightBracket,
|
|
']' => TokenKind::RightBracket,
|
|
|
- '{' if !self.interpolation_state.is_inside() => TokenKind::LeftCurly,
|
|
|
|
|
- '}' if !self.interpolation_state.is_inside() => TokenKind::RightCurly,
|
|
|
|
|
|
|
+ '{' => {
|
|
|
|
|
+ self.open_scope(ScopeType::OrdinaryCurly)?;
|
|
|
|
|
+ TokenKind::LeftCurly
|
|
|
|
|
+ }
|
|
|
|
|
+ '}' if self.is_directly_inside(ScopeType::OrdinaryCurly) => {
|
|
|
|
|
+ self.close_scope(ScopeType::OrdinaryCurly)?;
|
|
|
|
|
+ TokenKind::RightCurly
|
|
|
|
|
+ }
|
|
|
'≤' => TokenKind::LessOrEqual,
|
|
'≤' => TokenKind::LessOrEqual,
|
|
|
'<' if self.match_char(input, '=') => TokenKind::LessOrEqual,
|
|
'<' if self.match_char(input, '=') => TokenKind::LessOrEqual,
|
|
|
'<' => TokenKind::LessThan,
|
|
'<' => TokenKind::LessThan,
|
|
@@ -603,41 +664,52 @@ impl Tokenizer {
|
|
|
'¹' | '²' | '³' | '⁴' | '⁵' | '⁶' | '⁷' | '⁸' | '⁹' => {
|
|
'¹' | '²' | '³' | '⁴' | '⁵' | '⁶' | '⁷' | '⁸' | '⁹' => {
|
|
|
TokenKind::UnicodeExponent
|
|
TokenKind::UnicodeExponent
|
|
|
}
|
|
}
|
|
|
- '"' => match self.interpolation_state {
|
|
|
|
|
- InterpolationState::Outside => {
|
|
|
|
|
- self.string_start = self.token_start;
|
|
|
|
|
-
|
|
|
|
|
- self.consume_string(input)?;
|
|
|
|
|
-
|
|
|
|
|
- if self.match_char(input, '"') {
|
|
|
|
|
- TokenKind::StringFixed
|
|
|
|
|
- } else if self.match_char(input, '{') {
|
|
|
|
|
- self.interpolation_state = InterpolationState::Inside;
|
|
|
|
|
- self.interpolation_start = self.last;
|
|
|
|
|
- TokenKind::StringInterpolationStart
|
|
|
|
|
- } else {
|
|
|
|
|
- return Err(TokenizerError {
|
|
|
|
|
- kind: TokenizerErrorKind::UnterminatedString,
|
|
|
|
|
- span: Span {
|
|
|
|
|
- start: self.token_start,
|
|
|
|
|
- end: self.current,
|
|
|
|
|
- code_source_id: self.code_source_id,
|
|
|
|
|
- },
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- InterpolationState::Inside => {
|
|
|
|
|
|
|
+ '"' => {
|
|
|
|
|
+ self.open_scope(ScopeType::String)?;
|
|
|
|
|
+ self.consume_string(input)?;
|
|
|
|
|
+
|
|
|
|
|
+ if self.match_char(input, '"') {
|
|
|
|
|
+ self.close_scope(ScopeType::String)?;
|
|
|
|
|
+ TokenKind::StringFixed
|
|
|
|
|
+ } else if self.match_char(input, '{') {
|
|
|
|
|
+ self.open_scope(ScopeType::Interpolation)?;
|
|
|
|
|
+ TokenKind::StringInterpolationStart
|
|
|
|
|
+ } else if self.match_char(input, '}') {
|
|
|
|
|
+ return Err(TokenizerError {
|
|
|
|
|
+ kind: TokenizerErrorKind::UnterminatedString,
|
|
|
|
|
+ span: Span {
|
|
|
|
|
+ start: self
|
|
|
|
|
+ .scope_start(ScopeType::String)
|
|
|
|
|
+ .unwrap_or(self.token_start),
|
|
|
|
|
+ end: self.current,
|
|
|
|
|
+ code_source_id: self.code_source_id,
|
|
|
|
|
+ },
|
|
|
|
|
+ });
|
|
|
|
|
+ } else if self.is_inside_child_of(ScopeType::Interpolation) {
|
|
|
return Err(TokenizerError {
|
|
return Err(TokenizerError {
|
|
|
kind: TokenizerErrorKind::UnterminatedStringInterpolation,
|
|
kind: TokenizerErrorKind::UnterminatedStringInterpolation,
|
|
|
span: Span {
|
|
span: Span {
|
|
|
- start: self.interpolation_start,
|
|
|
|
|
- end: self.last,
|
|
|
|
|
|
|
+ start: self
|
|
|
|
|
+ .scope_start(ScopeType::Interpolation)
|
|
|
|
|
+ .unwrap_or(self.token_start),
|
|
|
|
|
+ end: self.current,
|
|
|
|
|
+ code_source_id: self.code_source_id,
|
|
|
|
|
+ },
|
|
|
|
|
+ });
|
|
|
|
|
+ } else {
|
|
|
|
|
+ return Err(TokenizerError {
|
|
|
|
|
+ kind: TokenizerErrorKind::UnterminatedString,
|
|
|
|
|
+ span: Span {
|
|
|
|
|
+ start: self
|
|
|
|
|
+ .scope_start(ScopeType::String)
|
|
|
|
|
+ .unwrap_or(self.token_start),
|
|
|
|
|
+ end: self.current,
|
|
|
code_source_id: self.code_source_id,
|
|
code_source_id: self.code_source_id,
|
|
|
},
|
|
},
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
- },
|
|
|
|
|
- ':' if self.interpolation_state.is_inside() => {
|
|
|
|
|
|
|
+ }
|
|
|
|
|
+ ':' if self.is_directly_inside(ScopeType::Interpolation) => {
|
|
|
while self
|
|
while self
|
|
|
.peek(input)
|
|
.peek(input)
|
|
|
.map(|c| c != '"' && c != '}')
|
|
.map(|c| c != '"' && c != '}')
|
|
@@ -669,32 +741,29 @@ impl Tokenizer {
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- '}' if self.interpolation_state.is_inside() => {
|
|
|
|
|
|
|
+ '}' if self.is_directly_inside(ScopeType::Interpolation) => {
|
|
|
|
|
+ self.close_scope(ScopeType::Interpolation)?;
|
|
|
self.consume_string(input)?;
|
|
self.consume_string(input)?;
|
|
|
|
|
|
|
|
if self.match_char(input, '"') {
|
|
if self.match_char(input, '"') {
|
|
|
- self.interpolation_state = InterpolationState::Outside;
|
|
|
|
|
|
|
+ self.close_scope(ScopeType::String)?;
|
|
|
TokenKind::StringInterpolationEnd
|
|
TokenKind::StringInterpolationEnd
|
|
|
} else if self.match_char(input, '{') {
|
|
} else if self.match_char(input, '{') {
|
|
|
- self.interpolation_start = self.last;
|
|
|
|
|
|
|
+ self.open_scope(ScopeType::Interpolation)?;
|
|
|
TokenKind::StringInterpolationMiddle
|
|
TokenKind::StringInterpolationMiddle
|
|
|
} else {
|
|
} else {
|
|
|
return Err(TokenizerError {
|
|
return Err(TokenizerError {
|
|
|
kind: TokenizerErrorKind::UnterminatedString,
|
|
kind: TokenizerErrorKind::UnterminatedString,
|
|
|
span: Span {
|
|
span: Span {
|
|
|
- start: self.string_start,
|
|
|
|
|
|
|
+ start: self
|
|
|
|
|
+ .scope_start(ScopeType::String)
|
|
|
|
|
+ .unwrap_or(self.token_start),
|
|
|
end: self.current,
|
|
end: self.current,
|
|
|
code_source_id: self.code_source_id,
|
|
code_source_id: self.code_source_id,
|
|
|
},
|
|
},
|
|
|
});
|
|
});
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- '{' if self.interpolation_state.is_inside() => {
|
|
|
|
|
- return Err(TokenizerError {
|
|
|
|
|
- kind: TokenizerErrorKind::UnexpectedCurlyInInterpolation,
|
|
|
|
|
- span: self.last.single_character_span(code_source_id),
|
|
|
|
|
- });
|
|
|
|
|
- }
|
|
|
|
|
'…' => TokenKind::Ellipsis,
|
|
'…' => TokenKind::Ellipsis,
|
|
|
c if is_identifier_start(c) => {
|
|
c if is_identifier_start(c) => {
|
|
|
while self
|
|
while self
|
|
@@ -1141,10 +1210,6 @@ fn test_tokenize_string() {
|
|
|
tokenize("\"foo = {foo}.", 0).unwrap_err().kind,
|
|
tokenize("\"foo = {foo}.", 0).unwrap_err().kind,
|
|
|
TokenizerErrorKind::UnterminatedString
|
|
TokenizerErrorKind::UnterminatedString
|
|
|
);
|
|
);
|
|
|
- assert_eq!(
|
|
|
|
|
- tokenize("\"foo = {foo, bar = {bar}\"", 0).unwrap_err().kind,
|
|
|
|
|
- TokenizerErrorKind::UnexpectedCurlyInInterpolation
|
|
|
|
|
- );
|
|
|
|
|
|
|
|
|
|
insta::assert_snapshot!(
|
|
insta::assert_snapshot!(
|
|
|
tokenize_reduced_pretty(r#""start \"inner\" end""#).unwrap(),
|
|
tokenize_reduced_pretty(r#""start \"inner\" end""#).unwrap(),
|