Przeglądaj źródła

Improve reporting of parsing errors

David Peter 2 lat temu
rodzic
commit
2731a37ef4

+ 1 - 0
examples/parse_error/expected_digit.nbt

@@ -0,0 +1 @@
+1e-

+ 1 - 0
examples/parse_error/expected_digit_in_base.nbt

@@ -0,0 +1 @@
+0x1Z

+ 1 - 0
examples/parse_error/expected_equal_or_colon_after_let.nbt

@@ -0,0 +1 @@
+let foo 123

+ 1 - 0
examples/parse_error/expected_identifier_after_fn.nbt

@@ -0,0 +1 @@
+fn 123(x) = 3

+ 1 - 0
examples/parse_error/expected_identifier_after_let.nbt

@@ -0,0 +1 @@
+let 123 = 100

+ 1 - 0
examples/parse_error/expected_identifier_in_postfix_apply.nbt

@@ -0,0 +1 @@
+100 // 20

+ 1 - 0
examples/parse_error/expected_primary.nbt

@@ -0,0 +1 @@
+sqrt(123+)

+ 2 - 0
examples/parse_error/missing_closing_paren1.nbt

@@ -0,0 +1,2 @@
+@aliases(foo
+unit bar

+ 1 - 0
examples/parse_error/missing_closing_paren2.nbt

@@ -0,0 +1 @@
+sqrt(2

+ 1 - 0
examples/parse_error/missing_closing_paren3.nbt

@@ -0,0 +1 @@
+(2+3

+ 1 - 0
examples/parse_error/missing_closing_paren4.nbt

@@ -0,0 +1 @@
+dimension Foo = Bar / (Baz * Qux

+ 1 - 0
examples/parse_error/missing_closing_paren5.nbt

@@ -0,0 +1 @@
+dimension Foo = Bar^(-3 * Baz

+ 1 - 0
examples/parse_error/only_single_variadic_parameter.nbt

@@ -0,0 +1 @@
+fn foo(l: Length…, t: Time…) -> Scalar

+ 1 - 0
examples/parse_error/unexpected_character_in_identifier.nbt

@@ -0,0 +1 @@
+pi.3

+ 1 - 0
examples/parse_error/unexpected_character_in_negative_exponent.nbt

@@ -0,0 +1 @@
+2⁻3

+ 1 - 0
examples/parse_error/unexpected_character_in_number_literal.nbt

@@ -0,0 +1 @@
+2e-33.0

+ 1 - 0
examples/parse_error/unterminated_string.nbt

@@ -0,0 +1 @@
+foo("hello world

+ 2 - 2
numbat-cli/tests/integration.rs

@@ -20,7 +20,7 @@ fn pass_expression_on_command_line() {
         .arg("2 ++ 3")
         .assert()
         .failure()
-        .stderr(predicates::str::contains("Parse error"));
+        .stderr(predicates::str::contains("while parsing"));
 
     numbat()
         .arg("--expression")
@@ -45,7 +45,7 @@ fn read_code_from_file() {
         .arg("../examples/parse_error/trailing_characters.nbt")
         .assert()
         .failure()
-        .stderr(predicates::str::contains("Parse error"));
+        .stderr(predicates::str::contains("while parsing"));
 }
 
 #[test]

+ 18 - 14
numbat/src/parser.rs

@@ -64,19 +64,19 @@ pub enum ParseErrorKind {
     #[error("Expected identifier after 'let' keyword")]
     ExpectedIdentifierAfterLet,
 
-    #[error("Expected '=' or ':' after identifier in 'let' assignment")]
+    #[error("Expected '=' or ':' after identifier (and type annotation) in 'let' assignment")]
     ExpectedEqualOrColonAfterLetIdentifier,
 
     #[error("Expected identifier after 'fn' keyword. Note that some reserved words can not be used as function names.")]
     ExpectedIdentifierAfterFn,
 
-    #[error("Expected function name after '//' operator")]
+    #[error("Expected function name after '//' postfix apply operator")]
     ExpectedIdentifierInPostfixApply,
 
     #[error("Expected dimension identifier, '1', or opening parenthesis")]
     ExpectedDimensionPrimary,
 
-    #[error("Expected ',' or '>'")]
+    #[error("Expected ',' or '>' in type parameter list")]
     ExpectedCommaOrRightAngleBracket,
 
     #[error("Expected identifier (type parameter name)")]
@@ -94,7 +94,7 @@ pub enum ParseErrorKind {
     #[error("Only a single variadic parameter is allowed in a function definition")]
     OnlySingleVariadicParameter,
 
-    #[error("Variadic parameters are only allowed in foreign functions")]
+    #[error("Variadic parameters are only allowed in foreign functions (without body)")]
     VariadicParameterOnlyAllowedInForeignFunction,
 
     #[error("Expected identifier (dimension name)")]
@@ -292,6 +292,8 @@ impl<'a> Parser<'a> {
                     });
                 }
 
+                let mut parameter_span = self.peek().span.clone();
+
                 let mut parameters = vec![];
                 while self.match_exact(TokenKind::RightParen).is_none() {
                     if let Some(param_name) = self.match_exact(TokenKind::Identifier) {
@@ -309,6 +311,8 @@ impl<'a> Parser<'a> {
                             is_variadic,
                         ));
 
+                        parameter_span = parameter_span.extend(&self.last().unwrap().span);
+
                         if self.match_exact(TokenKind::Comma).is_none()
                             && self.peek().kind != TokenKind::RightParen
                         {
@@ -325,14 +329,6 @@ impl<'a> Parser<'a> {
                     }
                 }
 
-                let fn_is_variadic = parameters.iter().any(|p| p.2);
-                if fn_is_variadic && parameters.len() > 1 {
-                    return Err(ParseError {
-                        kind: ParseErrorKind::OnlySingleVariadicParameter,
-                        span: self.peek().span.clone(),
-                    });
-                }
-
                 let optional_return_type_dexpr = if self.match_exact(TokenKind::Arrow).is_some() {
                     // Parse return type
                     Some(self.dimension_expression()?)
@@ -340,6 +336,14 @@ impl<'a> Parser<'a> {
                     None
                 };
 
+                let fn_is_variadic = parameters.iter().any(|p| p.2);
+                if fn_is_variadic && parameters.len() > 1 {
+                    return Err(ParseError {
+                        kind: ParseErrorKind::OnlySingleVariadicParameter,
+                        span: parameter_span,
+                    });
+                }
+
                 let body = if self.match_exact(TokenKind::Equal).is_none() {
                     None
                 } else {
@@ -349,7 +353,7 @@ impl<'a> Parser<'a> {
                 if fn_is_variadic && body.is_some() {
                     return Err(ParseError {
                         kind: ParseErrorKind::VariadicParameterOnlyAllowedInForeignFunction,
-                        span: self.peek().span.clone(),
+                        span: parameter_span,
                     });
                 }
 
@@ -496,7 +500,7 @@ impl<'a> Parser<'a> {
         } else {
             Err(ParseError::new(
                 ParseErrorKind::CanOnlyCallIdentifier,
-                self.peek().span.clone(),
+                self.peek().span.clone(), // TODO: Ideally, this span should point to whatever we try to call. Once we have spans in the AST, this should be easy to resolve.
             ))
         }
     }

+ 2 - 2
numbat/src/resolver.rs

@@ -82,10 +82,10 @@ impl Resolver {
     fn parse(&self, code: &str, code_source_index: usize) -> Result<Vec<Statement>> {
         parse(code).map_err(|inner| {
             let diagnostic = Diagnostic::error()
-                .with_message("Parse error")
+                .with_message("while parsing")
                 .with_labels(vec![Label::primary(
                     code_source_index,
-                    (inner.span.position.byte)..(inner.span.position.byte + 1),
+                    (inner.span.start.byte)..(inner.span.end.byte),
                 )
                 .with_message(inner.kind.to_string())]);
             ResolverError::ParseError { inner, diagnostic }

+ 13 - 3
numbat/src/span.rs

@@ -1,4 +1,4 @@
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct SourceCodePositition {
     pub byte: usize,
     pub index: usize,
@@ -18,12 +18,22 @@ impl SourceCodePositition {
 
     pub fn to_single_character_span(&self) -> Span {
         Span {
-            position: self.clone(),
+            start: self.clone(),
+            end: self.clone(),
         }
     }
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Span {
-    pub position: SourceCodePositition,
+    pub start: SourceCodePositition,
+    pub end: SourceCodePositition,
+}
+impl Span {
+    pub fn extend(&self, other: &Span) -> Span {
+        Span {
+            start: self.start,
+            end: other.end,
+        }
+    }
 }

+ 15 - 6
numbat/src/tokenizer.rs

@@ -103,6 +103,7 @@ pub struct Token {
 struct Tokenizer {
     input: Vec<char>,
     current: SourceCodePositition,
+    last: SourceCodePositition,
     token_start: SourceCodePositition,
 }
 
@@ -126,6 +127,7 @@ impl Tokenizer {
         Tokenizer {
             input: input.chars().collect(),
             current: SourceCodePositition::start(),
+            last: SourceCodePositition::start(),
             token_start: SourceCodePositition::start(),
         }
     }
@@ -336,10 +338,13 @@ impl Tokenizer {
                 if self.match_char('"') {
                     TokenKind::String
                 } else {
-                    return tokenizer_error(
-                        &self.token_start,
-                        TokenizerErrorKind::UnterminatedString,
-                    );
+                    return Err(TokenizerError {
+                        kind: TokenizerErrorKind::UnterminatedString,
+                        span: Span {
+                            start: self.token_start,
+                            end: self.last,
+                        },
+                    });
                 }
             }
             '…' => TokenKind::Ellipsis,
@@ -372,7 +377,10 @@ impl Tokenizer {
         let token = Some(Token {
             kind,
             lexeme: self.lexeme(),
-            span: self.token_start.to_single_character_span(),
+            span: Span {
+                start: self.token_start,
+                end: self.current,
+            },
         });
 
         if kind == TokenKind::Newline {
@@ -391,6 +399,7 @@ impl Tokenizer {
 
     fn advance(&mut self) -> char {
         let c = self.input[self.current.index];
+        self.last = self.current;
         self.current.index += 1;
         self.current.byte += c.len_utf8();
         self.current.position += 1;
@@ -433,7 +442,7 @@ fn tokenize_reduced(input: &str) -> Vec<(String, TokenKind, (usize, usize))> {
             (
                 token.lexeme.to_string(),
                 token.kind,
-                (token.span.position.line, token.span.position.position),
+                (token.span.start.line, token.span.start.position),
             )
         })
         .collect()