Browse Source

Add support for Rust-style format specifiers in string interpolations

Fixes #264.
Mohammed Anas 1 year ago
parent
commit
b070b880c8

+ 7 - 0
Cargo.lock

@@ -888,6 +888,7 @@ dependencies = [
  "pretty_dtoa",
  "rand",
  "rust-embed",
+ "strfmt",
  "strsim 0.11.0",
  "termcolor",
  "thiserror",
@@ -1435,6 +1436,12 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
+[[package]]
+name = "strfmt"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a8348af2d9fc3258c8733b8d9d8db2e56f54b2363a4b5b81585c7875ed65e65"
+
 [[package]]
 name = "strsim"
 version = "0.10.0"

+ 1 - 0
book/src/example-numbat_syntax.md

@@ -103,6 +103,7 @@ print(2 kilowarhol)              # Print the value of an expression
 print("hello world")             # Print a message
 print("value of pi = {pi}")      # String interpolation
 print("sqrt(10) = {sqrt(10)}")   # Expressions in string interpolation
+print("value of π ≈ {π:.3}")     # Format specifiers
 
 assert(1 yard < 1 meter)         # Assertion
 

+ 9 - 0
book/src/procedures.md

@@ -26,6 +26,15 @@ let speed = 25 km/h
 print("Speed of the bicycle: {speed} ({speed -> mph})")
 ```
 
+Format specifiers are also supported in interpolations. For instance:
+
+```nbt
+print("{pi:0.2f}") // Prints "3.14"
+```
+
+For more information on supported format specifiers, please see
+[this page](https://doc.rust-lang.org/std/fmt/#formatting-parameters).
+
 ## Testing
 
 The `assert_eq` procedure can be used to test for (approximate) equality of two quantities.

+ 1 - 0
examples/numbat_syntax.nbt

@@ -98,6 +98,7 @@ print(2 kilowarhol)              # Print the value of an expression
 print("hello world")             # Print a message
 print("value of pi = {pi}")      # String interpolation
 print("sqrt(10) = {sqrt(10)}")   # Expressions in string interpolation
+print("value of π ≈ {π:.3}")     # Format specifiers
 
 assert(1 yard < 1 meter)         # Assertion
 

+ 1 - 0
numbat/Cargo.toml

@@ -35,6 +35,7 @@ iana-time-zone = "0.1"
 termcolor = { version = "1.4.1", optional = true }
 html-escape = { version = "0.2.13", optional = true }
 rand = "0.8.5"
+strfmt = "0.2.4"
 
 [features]
 default = ["fetch-exchangerates"]

+ 14 - 4
numbat/src/ast.rs

@@ -58,7 +58,11 @@ impl PrettyPrint for BinaryOperator {
 #[derive(Debug, Clone, PartialEq)]
 pub enum StringPart {
     Fixed(String),
-    Interpolation(Span, Box<Expression>),
+    Interpolation {
+        span: Span,
+        expr: Box<Expression>,
+        format_specifiers: Option<String>,
+    },
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -426,9 +430,15 @@ impl ReplaceSpans for StringPart {
     fn replace_spans(&self) -> Self {
         match self {
             f @ StringPart::Fixed(_) => f.clone(),
-            StringPart::Interpolation(_, expr) => {
-                StringPart::Interpolation(Span::dummy(), Box::new(expr.replace_spans()))
-            }
+            StringPart::Interpolation {
+                expr,
+                format_specifiers,
+                span: _,
+            } => StringPart::Interpolation {
+                span: Span::dummy(),
+                expr: Box::new(expr.replace_spans()),
+                format_specifiers: format_specifiers.clone(),
+            },
         }
     }
 }

+ 9 - 1
numbat/src/bytecode_interpreter.rs

@@ -187,8 +187,16 @@ impl BytecodeInterpreter {
                             let index = self.vm.add_constant(Constant::String(s.clone()));
                             self.vm.add_op1(Op::LoadConstant, index)
                         }
-                        StringPart::Interpolation(_, expr) => {
+                        StringPart::Interpolation {
+                            expr,
+                            span: _,
+                            format_specifiers,
+                        } => {
                             self.compile_expression_with_simplify(expr)?;
+                            let index = self.vm.add_constant(Constant::FormatSpecifiers(
+                                format_specifiers.clone(),
+                            ));
+                            self.vm.add_op1(Op::LoadConstant, index)
                         }
                     }
                 }

+ 6 - 0
numbat/src/interpreter.rs

@@ -47,6 +47,12 @@ pub enum RuntimeError {
     DateTimeOutOfRange,
     #[error("Error in datetime format. See https://docs.rs/chrono/latest/chrono/format/strftime/index.html for possible format specifiers.")]
     DateFormattingError,
+
+    #[error("Invalid format specifiers: {0}")]
+    InvalidFormatSpecifiers(String),
+
+    #[error("Incorrect type for format specifiers: {0}")]
+    InvalidTypeForFormatSpecifiers(String),
 }
 
 #[derive(Debug, PartialEq, Eq)]

+ 57 - 21
numbat/src/parser.rs

@@ -1140,10 +1140,9 @@ impl<'a> Parser<'a> {
                 vec![StringPart::Fixed(strip_first_and_last(&token.lexeme))],
             ))
         } else if let Some(token) = self.match_exact(TokenKind::StringInterpolationStart) {
-            let mut parts = vec![StringPart::Fixed(strip_first_and_last(&token.lexeme))];
+            let mut parts = Vec::new();
 
-            let expr = self.expression()?;
-            parts.push(StringPart::Interpolation(expr.full_span(), Box::new(expr)));
+            self.interpolation(&mut parts, &token)?;
 
             let mut span_full_string = token.span;
             let mut has_end = false;
@@ -1154,10 +1153,7 @@ impl<'a> Parser<'a> {
                 span_full_string = span_full_string.extend(&inner_token.span);
                 match inner_token.kind {
                     TokenKind::StringInterpolationMiddle => {
-                        parts.push(StringPart::Fixed(strip_first_and_last(&inner_token.lexeme)));
-
-                        let expr = self.expression()?;
-                        parts.push(StringPart::Interpolation(expr.full_span(), Box::new(expr)));
+                        self.interpolation(&mut parts, &inner_token)?;
                     }
                     TokenKind::StringInterpolationEnd => {
                         parts.push(StringPart::Fixed(strip_first_and_last(&inner_token.lexeme)));
@@ -1206,6 +1202,24 @@ impl<'a> Parser<'a> {
         }
     }
 
+    fn interpolation(&mut self, parts: &mut Vec<StringPart>, token: &Token) -> Result<()> {
+        parts.push(StringPart::Fixed(strip_first_and_last(&token.lexeme)));
+
+        let expr = self.expression()?;
+
+        let format_specifiers = self
+            .match_exact(TokenKind::StringInterpolationSpecifiers)
+            .map(|token| token.lexeme.clone());
+
+        parts.push(StringPart::Interpolation {
+            span: expr.full_span(),
+            expr: Box::new(expr),
+            format_specifiers,
+        });
+
+        Ok(())
+    }
+
     /// Returns true iff the upcoming token indicates the beginning of a 'power'
     /// expression (which needs to start with a 'primary' expression).
     fn next_token_could_start_power_expression(&self) -> bool {
@@ -2485,7 +2499,11 @@ mod tests {
                 Span::dummy(),
                 vec![
                     StringPart::Fixed("pi = ".into()),
-                    StringPart::Interpolation(Span::dummy(), Box::new(identifier!("pi"))),
+                    StringPart::Interpolation {
+                        span: Span::dummy(),
+                        expr: Box::new(identifier!("pi")),
+                        format_specifiers: None,
+                    },
                 ],
             ),
         );
@@ -2494,10 +2512,11 @@ mod tests {
             &["\"{pi}\""],
             Expression::String(
                 Span::dummy(),
-                vec![StringPart::Interpolation(
-                    Span::dummy(),
-                    Box::new(identifier!("pi")),
-                )],
+                vec![StringPart::Interpolation {
+                    span: Span::dummy(),
+                    expr: Box::new(identifier!("pi")),
+                    format_specifiers: None,
+                }],
             ),
         );
 
@@ -2506,8 +2525,16 @@ mod tests {
             Expression::String(
                 Span::dummy(),
                 vec![
-                    StringPart::Interpolation(Span::dummy(), Box::new(identifier!("pi"))),
-                    StringPart::Interpolation(Span::dummy(), Box::new(identifier!("e"))),
+                    StringPart::Interpolation {
+                        span: Span::dummy(),
+                        expr: Box::new(identifier!("pi")),
+                        format_specifiers: None,
+                    },
+                    StringPart::Interpolation {
+                        span: Span::dummy(),
+                        expr: Box::new(identifier!("e")),
+                        format_specifiers: None,
+                    },
                 ],
             ),
         );
@@ -2517,23 +2544,32 @@ mod tests {
             Expression::String(
                 Span::dummy(),
                 vec![
-                    StringPart::Interpolation(Span::dummy(), Box::new(identifier!("pi"))),
+                    StringPart::Interpolation {
+                        span: Span::dummy(),
+                        expr: Box::new(identifier!("pi")),
+                        format_specifiers: None,
+                    },
                     StringPart::Fixed(" + ".into()),
-                    StringPart::Interpolation(Span::dummy(), Box::new(identifier!("e"))),
+                    StringPart::Interpolation {
+                        span: Span::dummy(),
+                        expr: Box::new(identifier!("e")),
+                        format_specifiers: None,
+                    },
                 ],
             ),
         );
 
         parse_as_expression(
-            &["\"1 + 2 = {1 + 2}\""],
+            &["\"1 + 2 = {1 + 2:0.2}\""],
             Expression::String(
                 Span::dummy(),
                 vec![
                     StringPart::Fixed("1 + 2 = ".into()),
-                    StringPart::Interpolation(
-                        Span::dummy(),
-                        Box::new(binop!(scalar!(1.0), Add, scalar!(2.0))),
-                    ),
+                    StringPart::Interpolation {
+                        span: Span::dummy(),
+                        expr: Box::new(binop!(scalar!(1.0), Add, scalar!(2.0))),
+                        format_specifiers: Some(":0.2".to_string()),
+                    },
                 ],
             ),
         );

+ 8 - 3
numbat/src/prefix_transformer.rs

@@ -85,10 +85,15 @@ impl Transformer {
                     .into_iter()
                     .map(|p| match p {
                         f @ StringPart::Fixed(_) => f,
-                        StringPart::Interpolation(span, expr) => StringPart::Interpolation(
+                        StringPart::Interpolation {
                             span,
-                            Box::new(self.transform_expression(*expr)),
-                        ),
+                            expr,
+                            format_specifiers,
+                        } => StringPart::Interpolation {
+                            span,
+                            expr: Box::new(self.transform_expression(*expr)),
+                            format_specifiers,
+                        },
                     })
                     .collect(),
             ),

+ 32 - 2
numbat/src/tokenizer.rs

@@ -123,6 +123,8 @@ pub enum TokenKind {
     StringInterpolationStart,
     // A part of a string between two interpolations: `}, and bar = {`
     StringInterpolationMiddle,
+    // Format specifiers for an interpolation, e.g. `:.03f`
+    StringInterpolationSpecifiers,
     // A part of a string which ends an interpolation: `}."`
     StringInterpolationEnd,
 
@@ -488,8 +490,6 @@ impl Tokenizer {
             '⩵' => TokenKind::EqualEqual,
             '=' if self.match_char('=') => TokenKind::EqualEqual,
             '=' => TokenKind::Equal,
-            ':' if self.match_char(':') => TokenKind::DoubleColon,
-            ':' => TokenKind::Colon,
             '@' => TokenKind::At,
             '→' | '➞' => TokenKind::Arrow,
             '-' if self.match_char('>') => TokenKind::Arrow,
@@ -548,6 +548,34 @@ impl Tokenizer {
                     });
                 }
             },
+            ':' if self.interpolation_state.is_inside() => {
+                while self.peek().map(|c| c != '"' && c != '}').unwrap_or(false) {
+                    self.advance();
+                }
+
+                if self.peek() == Some('"') {
+                    return Err(TokenizerError {
+                        kind: TokenizerErrorKind::UnterminatedStringInterpolation,
+                        span: Span {
+                            start: self.token_start,
+                            end: self.current,
+                            code_source_id: self.code_source_id,
+                        },
+                    });
+                }
+                if self.peek() == Some('}') {
+                    TokenKind::StringInterpolationSpecifiers
+                } else {
+                    return Err(TokenizerError {
+                        kind: TokenizerErrorKind::UnterminatedString,
+                        span: Span {
+                            start: self.token_start,
+                            end: self.current,
+                            code_source_id: self.code_source_id,
+                        },
+                    });
+                }
+            }
             '}' if self.interpolation_state.is_inside() => {
                 while self.peek().map(|c| c != '"' && c != '{').unwrap_or(false) {
                     self.advance();
@@ -595,6 +623,8 @@ impl Tokenizer {
                     TokenKind::Identifier
                 }
             }
+            ':' if self.match_char(':') => TokenKind::DoubleColon,
+            ':' => TokenKind::Colon,
             c => {
                 return tokenizer_error(
                     &self.token_start,

+ 9 - 6
numbat/src/typechecker.rs

@@ -1022,12 +1022,15 @@ impl TypeChecker {
                     .iter()
                     .map(|p| match p {
                         StringPart::Fixed(s) => Ok(typed_ast::StringPart::Fixed(s.clone())),
-                        StringPart::Interpolation(span, expr) => {
-                            Ok(typed_ast::StringPart::Interpolation(
-                                *span,
-                                Box::new(self.check_expression(expr)?),
-                            ))
-                        }
+                        StringPart::Interpolation {
+                            span,
+                            expr,
+                            format_specifiers,
+                        } => Ok(typed_ast::StringPart::Interpolation {
+                            span: *span,
+                            format_specifiers: format_specifiers.clone(),
+                            expr: Box::new(self.check_expression(expr)?),
+                        }),
                     })
                     .collect::<Result<_>>()?,
             ),

+ 19 - 3
numbat/src/typed_ast.rs

@@ -129,15 +129,31 @@ impl Type {
 #[derive(Debug, Clone, PartialEq)]
 pub enum StringPart {
     Fixed(String),
-    Interpolation(Span, Box<Expression>),
+    Interpolation {
+        span: Span,
+        expr: Box<Expression>,
+        format_specifiers: Option<String>,
+    },
 }
 
 impl PrettyPrint for StringPart {
     fn pretty_print(&self) -> Markup {
         match self {
             StringPart::Fixed(s) => s.pretty_print(),
-            StringPart::Interpolation(_, expr) => {
-                m::operator("{") + expr.pretty_print() + m::operator("}")
+            StringPart::Interpolation {
+                span: _,
+                expr,
+                format_specifiers,
+            } => {
+                let mut markup = m::operator("{") + expr.pretty_print();
+
+                if let Some(format_specifiers) = format_specifiers {
+                    markup += m::text(format_specifiers);
+                }
+
+                markup += m::operator("}");
+
+                markup
             }
         }
     }

+ 4 - 0
numbat/src/value.rs

@@ -28,6 +28,7 @@ pub enum Value {
     /// A DateTime with an associated offset used when pretty printing
     DateTime(chrono::DateTime<chrono::FixedOffset>),
     FunctionReference(FunctionReference),
+    FormatSpecifiers(Option<String>),
 }
 
 impl Value {
@@ -85,6 +86,7 @@ impl std::fmt::Display for Value {
             Value::String(s) => write!(f, "\"{}\"", s),
             Value::DateTime(dt) => write!(f, "datetime(\"{}\")", dt),
             Value::FunctionReference(r) => write!(f, "{}", r),
+            Value::FormatSpecifiers(_) => write!(f, "<format specfiers>"),
         }
     }
 }
@@ -97,6 +99,8 @@ impl PrettyPrint for Value {
             Value::String(s) => s.pretty_print(),
             Value::DateTime(dt) => crate::markup::string(crate::datetime::to_rfc2822_save(dt)),
             Value::FunctionReference(r) => crate::markup::string(r.to_string()),
+            Value::FormatSpecifiers(Some(s)) => crate::markup::string(s.to_string()),
+            Value::FormatSpecifiers(None) => crate::markup::empty(),
         }
     }
 }

+ 49 - 5
numbat/src/vm.rs

@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::{cmp::Ordering, fmt::Display};
 
 use crate::{
@@ -194,6 +195,7 @@ pub enum Constant {
     Boolean(bool),
     String(String),
     FunctionReference(FunctionReference),
+    FormatSpecifiers(Option<String>),
 }
 
 impl Constant {
@@ -204,6 +206,7 @@ impl Constant {
             Constant::Boolean(b) => Value::Boolean(*b),
             Constant::String(s) => Value::String(s.clone()),
             Constant::FunctionReference(inner) => Value::FunctionReference(inner.clone()),
+            Constant::FormatSpecifiers(s) => Value::FormatSpecifiers(s.clone()),
         }
     }
 }
@@ -216,6 +219,7 @@ impl Display for Constant {
             Constant::Boolean(val) => write!(f, "{}", val),
             Constant::String(val) => write!(f, "\"{}\"", val),
             Constant::FunctionReference(inner) => write!(f, "{}", inner),
+            Constant::FormatSpecifiers(_) => write!(f, "<format specfiers>"),
         }
     }
 }
@@ -861,13 +865,53 @@ impl Vm {
                 Op::JoinString => {
                     let num_parts = self.read_u16() as usize;
                     let mut joined = String::new();
+                    let to_str = |value| match value {
+                        Value::Quantity(q) => q.to_string(),
+                        Value::Boolean(b) => b.to_string(),
+                        Value::String(s) => s,
+                        Value::DateTime(dt) => crate::datetime::to_rfc2822_save(&dt),
+                        Value::FunctionReference(r) => r.to_string(),
+                        Value::FormatSpecifiers(_) => unreachable!(),
+                    };
+
+                    let map_strfmt_error_to_runtime_error = |err| match err {
+                        strfmt::FmtError::Invalid(s) => RuntimeError::InvalidFormatSpecifiers(s),
+                        strfmt::FmtError::TypeError(s) => {
+                            RuntimeError::InvalidTypeForFormatSpecifiers(s)
+                        }
+                        strfmt::FmtError::KeyError(_) => unreachable!(),
+                    };
+
                     for _ in 0..num_parts {
                         let part = match self.pop() {
-                            Value::Quantity(q) => q.to_string(),
-                            Value::Boolean(b) => b.to_string(),
-                            Value::String(s) => s,
-                            Value::DateTime(dt) => crate::datetime::to_rfc2822_save(&dt),
-                            Value::FunctionReference(r) => r.to_string(),
+                            Value::FormatSpecifiers(Some(specifiers)) => match self.pop() {
+                                Value::Quantity(q) => {
+                                    let mut vars = HashMap::new();
+                                    vars.insert("value".to_string(), q.unsafe_value().to_f64());
+
+                                    let mut str =
+                                        strfmt::strfmt(&format!("{{value{}}}", specifiers), &vars)
+                                            .map_err(map_strfmt_error_to_runtime_error)?;
+
+                                    let unit_str = q.unit().to_string();
+
+                                    if !unit_str.is_empty() {
+                                        str += " ";
+                                        str += &unit_str;
+                                    }
+
+                                    str
+                                }
+                                value => {
+                                    let mut vars = HashMap::new();
+                                    vars.insert("value".to_string(), to_str(value));
+
+                                    strfmt::strfmt(&format!("{{value{}}}", specifiers), &vars)
+                                        .map_err(map_strfmt_error_to_runtime_error)?
+                                }
+                            },
+                            Value::FormatSpecifiers(None) => to_str(self.pop()),
+                            v => to_str(v),
                         };
                         joined = part + &joined; // reverse order
                     }

+ 45 - 0
numbat/tests/interpreter.rs

@@ -544,6 +544,51 @@ fn test_conditionals() {
 fn test_string_interpolation() {
     expect_output("\"pi = {pi}!\"", "pi = 3.14159!");
     expect_output("\"1 + 2 = {1 + 2}\"", "1 + 2 = 3");
+
+    expect_output("\"{0.2:0.5}\"", "0.20000");
+    expect_output("\"pi ~= {pi:.3}\"", "pi ~= 3.142");
+    expect_output(
+        "\"both {pi:.3} and {e} are irrational and transcendental numbers\"",
+        "both 3.142 and 2.71828 are irrational and transcendental numbers",
+    );
+    expect_output(
+        "
+        let str = \"1234\"
+        \"{str:0.2}\"
+        ",
+        "12",
+    );
+
+    expect_output("\"{1_000_300:+.3}\"", "+1000300.000");
+
+    expect_output(
+        "
+        let str = \"1234\"
+        \"a {str:^10} b\"
+        ",
+        "a    1234    b",
+    );
+
+    // Doesn't work at the moment, as `strfmt` expects `i64`'s for `#x`, but Numbat deals with `f64`'s
+    // internally
+    //expect_output("\"{31:#x}\"", "0x1f")
+
+    expect_failure(
+        "\"{200:x}\"",
+        "Incorrect type for format specifiers: Unknown format code 'x' for type",
+    );
+    expect_failure(
+        "\"{200:.}\"",
+        "Invalid format specifiers: Format specifier missing precision",
+    );
+
+    expect_failure(
+        "
+        let str = \"1234\"
+        \"{str:.3f}\"
+        ",
+        "Incorrect type for format specifiers: Unknown format code Some('f') for object of type 'str'",
+    );
 }
 
 #[test]