浏览代码

Add string interpolation ("f-strings")

closes #111
David Peter 2 年之前
父节点
当前提交
b393b6af56

+ 1 - 0
numbat-cli/src/ansi_formatter.rs

@@ -13,6 +13,7 @@ impl Formatter for ANSIFormatter {
             FormatType::Whitespace => text.normal(),
             FormatType::Dimmed => text.dimmed(),
             FormatType::Text => text.normal(),
+            FormatType::String => text.green(),
             FormatType::Keyword => text.magenta(),
             FormatType::Value => text.yellow(),
             FormatType::Unit => text.cyan(),

+ 40 - 2
numbat/src/ast.rs

@@ -49,6 +49,29 @@ impl PrettyPrint for BinaryOperator {
     }
 }
 
+#[derive(Debug, Clone, PartialEq)]
+pub enum StringPart {
+    Fixed(String),
+    Interpolation(Span, String),
+}
+
+impl PrettyPrint for StringPart {
+    fn pretty_print(&self) -> Markup {
+        match self {
+            StringPart::Fixed(s) => s.pretty_print(),
+            StringPart::Interpolation(_, identifier) => {
+                m::operator("{") + m::identifier(identifier) + m::operator("}")
+            }
+        }
+    }
+}
+
+impl PrettyPrint for &Vec<StringPart> {
+    fn pretty_print(&self) -> Markup {
+        m::operator("\"") + self.iter().map(|p| p.pretty_print()).sum() + m::operator("\"")
+    }
+}
+
 #[derive(Debug, Clone, PartialEq)]
 pub enum Expression {
     Scalar(Span, Number),
@@ -67,7 +90,7 @@ pub enum Expression {
     },
     FunctionCall(Span, Span, String, Vec<Expression>),
     Boolean(Span, bool),
-    String(Span, String),
+    String(Span, Vec<StringPart>),
     Condition(Span, Box<Expression>, Box<Expression>, Box<Expression>),
 }
 
@@ -350,6 +373,18 @@ impl ReplaceSpans for DimensionExpression {
     }
 }
 
+#[cfg(test)]
+impl ReplaceSpans for StringPart {
+    fn replace_spans(&self) -> Self {
+        match self {
+            f @ StringPart::Fixed(_) => f.clone(),
+            StringPart::Interpolation(_, identifier) => {
+                StringPart::Interpolation(Span::dummy(), identifier.clone())
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 impl ReplaceSpans for Expression {
     fn replace_spans(&self) -> Self {
@@ -392,7 +427,10 @@ impl ReplaceSpans for Expression {
                 Box::new(then.replace_spans()),
                 Box::new(else_.replace_spans()),
             ),
-            Expression::String(_, string) => Expression::String(Span::dummy(), string.clone()),
+            Expression::String(_, parts) => Expression::String(
+                Span::dummy(),
+                parts.iter().map(|p| p.replace_spans()).collect(),
+            ),
         }
     }
 }

+ 24 - 10
numbat/src/bytecode_interpreter.rs

@@ -1,6 +1,6 @@
 use std::collections::HashMap;
 
-use crate::ast::ProcedureKind;
+use crate::ast::{ProcedureKind, StringPart};
 use crate::interpreter::{
     Interpreter, InterpreterResult, InterpreterSettings, Result, RuntimeError,
 };
@@ -21,6 +21,15 @@ pub struct BytecodeInterpreter {
 }
 
 impl BytecodeInterpreter {
+    fn compile_load_identifier(&mut self, identifier: &str) {
+        if let Some(position) = self.local_variables.iter().position(|n| n == identifier) {
+            self.vm.add_op1(Op::GetLocal, position as u16); // TODO: check overflow
+        } else {
+            let identifier_idx = self.vm.add_global_identifier(identifier, None);
+            self.vm.add_op1(Op::GetVariable, identifier_idx);
+        }
+    }
+
     fn compile_expression(&mut self, expr: &Expression) -> Result<()> {
         match expr {
             Expression::Scalar(_span, n) => {
@@ -28,12 +37,7 @@ impl BytecodeInterpreter {
                 self.vm.add_op1(Op::LoadConstant, index);
             }
             Expression::Identifier(_span, identifier, _type) => {
-                if let Some(position) = self.local_variables.iter().position(|n| n == identifier) {
-                    self.vm.add_op1(Op::GetLocal, position as u16); // TODO: check overflow
-                } else {
-                    let identifier_idx = self.vm.add_global_identifier(identifier, None);
-                    self.vm.add_op1(Op::GetVariable, identifier_idx);
-                }
+                self.compile_load_identifier(identifier);
             }
             Expression::UnitIdentifier(_span, prefix, unit_name, _full_name, _type) => {
                 let index = self
@@ -95,9 +99,19 @@ impl BytecodeInterpreter {
                 let index = self.vm.add_constant(Constant::Boolean(*val));
                 self.vm.add_op1(Op::LoadConstant, index);
             }
-            Expression::String(_, string) => {
-                let index = self.vm.add_constant(Constant::String(string.clone()));
-                self.vm.add_op1(Op::LoadConstant, index)
+            Expression::String(_, string_parts) => {
+                for part in string_parts {
+                    match part {
+                        StringPart::Fixed(s) => {
+                            let index = self.vm.add_constant(Constant::String(s.clone()));
+                            self.vm.add_op1(Op::LoadConstant, index)
+                        }
+                        StringPart::Interpolation(_, identifier) => {
+                            self.compile_load_identifier(identifier);
+                        }
+                    }
+                }
+                self.vm.add_op1(Op::JoinString, string_parts.len() as u16); // TODO: this can overflow
             }
             Expression::Condition(_, condition, then_expr, else_expr) => {
                 self.compile_expression(condition)?;

+ 9 - 0
numbat/src/markup.rs

@@ -5,6 +5,7 @@ pub enum FormatType {
     Whitespace,
     Dimmed,
     Text,
+    String,
     Keyword,
     Value,
     Unit,
@@ -90,6 +91,14 @@ pub fn text(text: impl AsRef<str>) -> Markup {
     ))
 }
 
+pub fn string(text: impl AsRef<str>) -> Markup {
+    Markup::from(FormattedString(
+        OutputType::Normal,
+        FormatType::String,
+        text.as_ref().to_string(),
+    ))
+}
+
 pub fn keyword(text: impl AsRef<str>) -> Markup {
     Markup::from(FormattedString(
         OutputType::Normal,

+ 146 - 4
numbat/src/parser.rs

@@ -29,14 +29,14 @@
 
 use crate::arithmetic::{Exponent, Rational};
 use crate::ast::{
-    BinaryOperator, DimensionExpression, Expression, ProcedureKind, Statement, TypeAnnotation,
-    UnaryOperator,
+    BinaryOperator, DimensionExpression, Expression, ProcedureKind, Statement, StringPart,
+    TypeAnnotation, UnaryOperator,
 };
 use crate::decorator::Decorator;
 use crate::number::Number;
 use crate::prefix_parser::AcceptsPrefix;
 use crate::resolver::ModulePath;
-use crate::span::Span;
+use crate::span::{SourceCodePositition, Span};
 use crate::tokenizer::{Token, TokenKind, TokenizerError, TokenizerErrorKind};
 
 use num_traits::{CheckedDiv, FromPrimitive, Zero};
@@ -154,6 +154,9 @@ pub enum ParseErrorKind {
 
     #[error("Expected 'else' in if-then-else condition")]
     ExpectedElse,
+
+    #[error("Unfinished string-interpolation field")]
+    UnfinishedStringInterpolationField,
 }
 
 #[derive(Debug, Clone, Error)]
@@ -1022,7 +1025,11 @@ impl<'a> Parser<'a> {
         } else if let Some(token) = self.match_exact(TokenKind::String) {
             Ok(Expression::String(
                 token.span,
-                token.lexeme.trim_matches('"').to_string(),
+                parse_string_interpolation(
+                    token.span.start,
+                    token.span.code_source_id,
+                    &token.lexeme,
+                )?,
             ))
         } else if self.match_exact(TokenKind::LeftParen).is_some() {
             let inner = self.expression()?;
@@ -1260,6 +1267,91 @@ impl<'a> Parser<'a> {
     }
 }
 
+fn parse_string_interpolation(
+    pos: SourceCodePositition,
+    code_source_id: usize,
+    string: &str,
+) -> Result<Vec<StringPart>> {
+    let mut parts = vec![];
+    let mut pos = pos;
+
+    let mut last_pos = pos;
+
+    let mut chars = string.chars();
+    let mut advance = || -> Option<(char, SourceCodePositition)> {
+        if let Some(c) = chars.next() {
+            pos.byte += c.len_utf8() as u32;
+            pos.position += 1;
+            Some((c, pos))
+        } else {
+            None
+        }
+    };
+
+    let mut lexeme = String::new();
+    let (_, mut lexeme_start) = advance().unwrap(); // Skip the initial quote for the beginning of the string
+
+    let mut in_fixed_mode = true;
+
+    loop {
+        if let Some((c, current_pos)) = advance() {
+            if in_fixed_mode {
+                if c == '"' {
+                    parts.push(StringPart::Fixed(lexeme.clone()));
+                    break;
+                } else if c == '{' {
+                    parts.push(StringPart::Fixed(lexeme.clone()));
+
+                    lexeme_start = current_pos;
+                    lexeme.clear();
+
+                    in_fixed_mode = false;
+                } else {
+                    lexeme.push(c);
+                }
+            } else {
+                if c == '}' {
+                    let span = Span {
+                        start: lexeme_start,
+                        end: last_pos,
+                        code_source_id,
+                    };
+
+                    parts.push(StringPart::Interpolation(span, lexeme.clone()));
+
+                    lexeme_start = current_pos;
+                    lexeme.clear();
+
+                    in_fixed_mode = true;
+                } else if c == '"' {
+                    let span = Span {
+                        start: lexeme_start,
+                        end: last_pos,
+                        code_source_id,
+                    };
+                    return Err(ParseError {
+                        kind: ParseErrorKind::UnfinishedStringInterpolationField,
+                        span,
+                    });
+                } else {
+                    lexeme.push(c);
+                }
+            }
+
+            last_pos = current_pos;
+        } else {
+            break;
+        }
+    }
+
+    parts = parts
+        .into_iter()
+        .filter(|p| !matches!(p, StringPart::Fixed(s) if s.is_empty()))
+        .collect();
+
+    Ok(parts)
+}
+
 pub fn parse(input: &str, code_source_id: usize) -> Result<Vec<Statement>> {
     use crate::tokenizer::tokenize;
 
@@ -2093,4 +2185,54 @@ mod tests {
         should_fail_with(&["if true 1 else 2"], ParseErrorKind::ExpectedThen);
         should_fail_with(&["if true then 1"], ParseErrorKind::ExpectedElse);
     }
+
+    #[test]
+    fn string_interpolation() {
+        parse_as_expression(
+            &["\"pi = {pi}\""],
+            Expression::String(
+                Span::dummy(),
+                vec![
+                    StringPart::Fixed("pi = ".into()),
+                    StringPart::Interpolation(Span::dummy(), "pi".into()),
+                ],
+            ),
+        );
+
+        parse_as_expression(
+            &["\"{pi}\""],
+            Expression::String(
+                Span::dummy(),
+                vec![StringPart::Interpolation(Span::dummy(), "pi".into())],
+            ),
+        );
+
+        parse_as_expression(
+            &["\"{pi}{e}\""],
+            Expression::String(
+                Span::dummy(),
+                vec![
+                    StringPart::Interpolation(Span::dummy(), "pi".into()),
+                    StringPart::Interpolation(Span::dummy(), "e".into()),
+                ],
+            ),
+        );
+
+        parse_as_expression(
+            &["\"{pi} + {e}\""],
+            Expression::String(
+                Span::dummy(),
+                vec![
+                    StringPart::Interpolation(Span::dummy(), "pi".into()),
+                    StringPart::Fixed(" + ".into()),
+                    StringPart::Interpolation(Span::dummy(), "e".into()),
+                ],
+            ),
+        );
+
+        should_fail_with(
+            &["\"pi = {pi\"", "\"pi = {\"", "\"pi = {pi}, e = {e\""],
+            ParseErrorKind::UnfinishedStringInterpolationField,
+        );
+    }
 }

+ 1 - 1
numbat/src/pretty_print.rs

@@ -12,6 +12,6 @@ impl PrettyPrint for bool {
 
 impl PrettyPrint for String {
     fn pretty_print(&self) -> Markup {
-        crate::markup::text(format!("\"{self}\""))
+        crate::markup::string(self)
     }
 }

+ 1 - 1
numbat/src/quantity.rs

@@ -340,7 +340,7 @@ impl std::fmt::Display for Quantity {
 
         let markup = self.pretty_print();
         let formatter = PlainTextFormatter {};
-        write!(f, "{}", formatter.format(&markup, false))
+        write!(f, "{}", formatter.format(&markup, false).trim())
     }
 }
 

+ 8 - 1
numbat/src/tokenizer.rs

@@ -629,6 +629,13 @@ fn test_tokenize_basic() {
         ]
     );
 
+    assert!(tokenize("~", 0).is_err());
+}
+
+#[test]
+fn test_tokenize_string() {
+    use TokenKind::*;
+
     assert_eq!(
         tokenize_reduced("\"foo\""),
         [
@@ -637,7 +644,7 @@ fn test_tokenize_basic() {
         ]
     );
 
-    assert!(tokenize("~", 0).is_err());
+    assert!(tokenize("\"foo", 0).is_err());
 }
 
 #[test]

+ 60 - 18
numbat/src/typechecker.rs

@@ -4,13 +4,13 @@ use std::{
     fmt,
 };
 
-use crate::span::Span;
 use crate::typed_ast::{self, Type};
 use crate::{
     arithmetic::{pretty_exponent, Exponent, Power, Rational},
     ast::ProcedureKind,
 };
 use crate::{ast, decorator, ffi, suggestion};
+use crate::{ast::StringPart, span::Span};
 use crate::{
     ast::TypeAnnotation,
     registry::{BaseRepresentation, BaseRepresentationFactor, RegistryError},
@@ -358,9 +358,15 @@ fn evaluate_const_expr(expr: &typed_ast::Expression) -> Result<Exponent> {
     }
 }
 
+#[derive(Clone, PartialEq)]
+enum IdentifierKind {
+    Variable,
+    Other,
+}
+
 #[derive(Clone, Default)]
 pub struct TypeChecker {
-    identifiers: HashMap<String, Type>,
+    identifiers: HashMap<String, (Type, IdentifierKind)>,
     function_signatures: HashMap<
         String,
         (
@@ -375,23 +381,27 @@ pub struct TypeChecker {
 }
 
 impl TypeChecker {
-    fn type_for_identifier(&self, span: Span, name: &str) -> Result<&Type> {
+    fn identifier_type_and_kind(&self, span: Span, name: &str) -> Result<&(Type, IdentifierKind)> {
         self.identifiers.get(name).ok_or_else(|| {
             let suggestion = suggestion::did_you_mean(self.identifiers.keys(), name);
             TypeCheckError::UnknownIdentifier(span, name.into(), suggestion)
         })
     }
 
+    fn identifier_type(&self, span: Span, name: &str) -> Result<&Type> {
+        self.identifier_type_and_kind(span, name).map(|(t, _)| t)
+    }
+
     pub(crate) fn check_expression(&self, ast: &ast::Expression) -> Result<typed_ast::Expression> {
         Ok(match ast {
             ast::Expression::Scalar(span, n) => typed_ast::Expression::Scalar(*span, *n),
             ast::Expression::Identifier(span, name) => {
-                let type_ = self.type_for_identifier(*span, name)?.clone();
+                let type_ = self.identifier_type(*span, name)?.clone();
 
                 typed_ast::Expression::Identifier(*span, name.clone(), type_)
             }
             ast::Expression::UnitIdentifier(span, prefix, name, full_name) => {
-                let type_ = self.type_for_identifier(*span, name)?.clone();
+                let type_ = self.identifier_type(*span, name)?.clone();
 
                 typed_ast::Expression::UnitIdentifier(
                     *span,
@@ -711,8 +721,22 @@ impl TypeChecker {
                 )
             }
             ast::Expression::Boolean(span, val) => typed_ast::Expression::Boolean(*span, *val),
-            ast::Expression::String(span, string) => {
-                typed_ast::Expression::String(*span, string.clone())
+            ast::Expression::String(span, string_parts) => {
+                for part in string_parts {
+                    if let StringPart::Interpolation(span, identifier) = part {
+                        let (_, kind) = self.identifier_type_and_kind(*span, identifier)?; // Make sure identifier exists
+                        if kind != &IdentifierKind::Variable {
+                            // String interpolation only works for variables, so far
+                            return Err(TypeCheckError::UnknownIdentifier(
+                                *span,
+                                identifier.clone(),
+                                None,
+                            ));
+                        }
+                    }
+                }
+
+                typed_ast::Expression::String(*span, string_parts.clone())
             }
             ast::Expression::Condition(span, condition, then, else_) => {
                 let condition = self.check_expression(condition)?;
@@ -751,8 +775,10 @@ impl TypeChecker {
             ast::Statement::Expression(expr) => {
                 let checked_expr = self.check_expression(expr)?;
                 for &identifier in LAST_RESULT_IDENTIFIERS {
-                    self.identifiers
-                        .insert(identifier.into(), checked_expr.get_type());
+                    self.identifiers.insert(
+                        identifier.into(),
+                        (checked_expr.get_type(), IdentifierKind::Variable),
+                    );
                 }
                 typed_ast::Statement::Expression(checked_expr)
             }
@@ -792,8 +818,13 @@ impl TypeChecker {
                     }
                 }
 
-                self.identifiers
-                    .insert(identifier.clone(), Type::Dimension(type_deduced.clone()));
+                self.identifiers.insert(
+                    identifier.clone(),
+                    (
+                        Type::Dimension(type_deduced.clone()),
+                        IdentifierKind::Variable,
+                    ),
+                );
 
                 typed_ast::Statement::DefineVariable(
                     identifier.clone(),
@@ -820,8 +851,13 @@ impl TypeChecker {
                         .map_err(TypeCheckError::RegistryError)?
                 };
                 for (name, _) in decorator::name_and_aliases(unit_name, decorators) {
-                    self.identifiers
-                        .insert(name.clone(), Type::Dimension(type_specified.clone()));
+                    self.identifiers.insert(
+                        name.clone(),
+                        (
+                            Type::Dimension(type_specified.clone()),
+                            IdentifierKind::Other,
+                        ),
+                    );
                 }
                 typed_ast::Statement::DefineBaseUnit(
                     unit_name.clone(),
@@ -873,8 +909,10 @@ impl TypeChecker {
                     }
                 }
                 for (name, _) in decorator::name_and_aliases(identifier, decorators) {
-                    self.identifiers
-                        .insert(name.clone(), Type::Dimension(type_deduced.clone()));
+                    self.identifiers.insert(
+                        name.clone(),
+                        (Type::Dimension(type_deduced.clone()), IdentifierKind::Other),
+                    );
                 }
                 typed_ast::Statement::DefineDerivedUnit(
                     identifier.clone(),
@@ -948,9 +986,13 @@ impl TypeChecker {
                             .map_err(TypeCheckError::RegistryError)?
                     };
 
-                    typechecker_fn
-                        .identifiers
-                        .insert(parameter.clone(), Type::Dimension(parameter_type.clone()));
+                    typechecker_fn.identifiers.insert(
+                        parameter.clone(),
+                        (
+                            Type::Dimension(parameter_type.clone()),
+                            IdentifierKind::Variable,
+                        ),
+                    );
                     typed_parameters.push((
                         *parameter_span,
                         parameter.clone(),

+ 3 - 3
numbat/src/typed_ast.rs

@@ -1,7 +1,7 @@
 use itertools::Itertools;
 
-use crate::ast::ProcedureKind;
 pub use crate::ast::{BinaryOperator, DimensionExpression, UnaryOperator};
+use crate::ast::{ProcedureKind, StringPart};
 use crate::dimension::DimensionRegistry;
 use crate::markup as m;
 use crate::{
@@ -84,7 +84,7 @@ pub enum Expression {
     FunctionCall(Span, Span, String, Vec<Expression>, Type),
     Boolean(Span, bool),
     Condition(Span, Box<Expression>, Box<Expression>, Box<Expression>),
-    String(Span, String),
+    String(Span, Vec<StringPart>),
 }
 
 impl Expression {
@@ -492,7 +492,7 @@ impl PrettyPrint for Expression {
                     + m::operator(")")
             }
             Boolean(_, val) => val.pretty_print(),
-            String(_, string) => string.pretty_print(),
+            String(_, parts) => parts.pretty_print(),
             Condition(_, condition, then, else_) => {
                 m::keyword("if")
                     + m::space()

+ 18 - 0
numbat/src/vm.rs

@@ -82,6 +82,9 @@ pub enum Op {
     /// Print a compile-time string
     PrintString,
 
+    /// Combine N strings on the stack into a single part, used by string interpolation
+    JoinString,
+
     /// Perform a simplification operation to the current value on the stack
     FullSimplify,
 
@@ -99,6 +102,7 @@ impl Op {
             | Op::GetVariable
             | Op::GetLocal
             | Op::PrintString
+            | Op::JoinString
             | Op::JumpIfFalse
             | Op::Jump => 1,
             Op::Negate
@@ -148,6 +152,7 @@ impl Op {
             Op::FFICallFunction => "FFICallFunction",
             Op::FFICallProcedure => "FFICallProcedure",
             Op::PrintString => "PrintString",
+            Op::JoinString => "JoinString",
             Op::FullSimplify => "FullSimplify",
             Op::Return => "Return",
         }
@@ -684,6 +689,19 @@ impl Vm {
                     let s = &self.strings[s_idx];
                     self.print(ctx, s);
                 }
+                Op::JoinString => {
+                    let num_parts = self.read_u16() as usize;
+                    let mut joined = String::new();
+                    for _ in 0..num_parts {
+                        let part = match self.pop() {
+                            Value::Quantity(q) => q.to_string(),
+                            Value::Boolean(b) => b.to_string(),
+                            Value::String(s) => s,
+                        };
+                        joined = part + &joined; // reverse order
+                    }
+                    self.push(Value::String(joined))
+                }
                 Op::FullSimplify => match self.pop() {
                     Value::Quantity(q) => {
                         let simplified = q.full_simplify();

+ 10 - 0
numbat/tests/interpreter.rs

@@ -357,4 +357,14 @@ fn test_comparisons() {
 fn test_conditionals() {
     expect_output("if 1 < 2 then 3 else 4", "3");
     expect_output("if 4 < 3 then 2 else 1", "1");
+    expect_output(
+        "if 4 > 3 then \"four is larger!\" else \"four is not larger!\"",
+        "four is larger!",
+    );
+}
+
+#[test]
+fn test_string_interpolation() {
+    expect_output("\"pi = {pi}!\"", "pi = 3.14159!");
+    expect_output("if 4 < 3 then 2 else 1", "1");
 }