Forráskód Böngészése

Store the spans linked to each instructions in the bytecode

Tamo 1 hete
szülő
commit
fc98ead94a
3 módosított fájl, 135 hozzáadás és 79 törlés
  1. 80 47
      numbat/src/bytecode_interpreter.rs
  2. 17 0
      numbat/src/span.rs
  3. 38 32
      numbat/src/vm.rs

+ 80 - 47
numbat/src/bytecode_interpreter.rs

@@ -50,11 +50,11 @@ pub struct BytecodeInterpreter {
 impl BytecodeInterpreter {
     fn compile_expression(&mut self, expr: &Expression) {
         match expr {
-            Expression::Scalar(_span, n, _type) => {
+            Expression::Scalar(span, n, _type) => {
                 let index = self.vm.add_constant(Constant::Scalar(n.to_f64()));
-                self.vm.add_op1(Op::LoadConstant, index);
+                self.vm.add_op1(Op::LoadConstant, index, *span);
             }
-            Expression::Identifier(_span, identifier, _type) => {
+            Expression::Identifier(span, identifier, _type) => {
                 // Searching in reverse order ensures that we find the innermost identifier of that name first (shadowing)
 
                 let current_depth = self.locals.len() - 1;
@@ -63,14 +63,15 @@ impl BytecodeInterpreter {
                     .iter()
                     .rposition(|l| l.identifiers.iter().any(|n| n == identifier))
                 {
-                    self.vm.add_op1(Op::GetLocal, position as u16); // TODO: check overflow
+                    self.vm.add_op1(Op::GetLocal, position as u16, *span); // TODO: check overflow
                 } else if let Some(upvalue_position) = self.locals[0]
                     .iter()
                     .rposition(|l| l.identifiers.iter().any(|n| n == identifier))
                 {
-                    self.vm.add_op1(Op::GetUpvalue, upvalue_position as u16);
+                    self.vm
+                        .add_op1(Op::GetUpvalue, upvalue_position as u16, *span);
                 } else if LAST_RESULT_IDENTIFIERS.contains(identifier) {
-                    self.vm.add_op(Op::GetLastResult);
+                    self.vm.add_op(Op::GetLastResult, *span);
                 } else if let Some(is_foreign) = self.functions.get(*identifier) {
                     let index = self
                         .vm
@@ -79,37 +80,37 @@ impl BytecodeInterpreter {
                         } else {
                             FunctionReference::Normal(identifier.to_compact_string())
                         }));
-                    self.vm.add_op1(Op::LoadConstant, index);
+                    self.vm.add_op1(Op::LoadConstant, index, *span);
                 } else {
                     unreachable!("Unknown identifier '{identifier}'")
                 }
             }
-            Expression::UnitIdentifier(_span, prefix, unit_name, _full_name, _type) => {
+            Expression::UnitIdentifier(span, prefix, unit_name, _full_name, _type) => {
                 let index = self
                     .unit_name_to_constant_index
                     .get(unit_name)
                     .expect("unit should already exist");
 
-                self.vm.add_op1(Op::LoadConstant, *index);
+                self.vm.add_op1(Op::LoadConstant, *index, *span);
 
                 if prefix != &Prefix::none() {
                     let prefix_idx = self.vm.add_prefix(*prefix);
-                    self.vm.add_op1(Op::ApplyPrefix, prefix_idx);
+                    self.vm.add_op1(Op::ApplyPrefix, prefix_idx, *span);
                 }
             }
-            Expression::UnaryOperator(_span, UnaryOperator::Negate, rhs, _type) => {
+            Expression::UnaryOperator(span, UnaryOperator::Negate, rhs, _type) => {
                 self.compile_expression(rhs);
-                self.vm.add_op(Op::Negate);
+                self.vm.add_op(Op::Negate, *span);
             }
-            Expression::UnaryOperator(_span, UnaryOperator::Factorial(order), lhs, _type) => {
+            Expression::UnaryOperator(span, UnaryOperator::Factorial(order), lhs, _type) => {
                 self.compile_expression(lhs);
-                self.vm.add_op1(Op::Factorial, order.get() as u16);
+                self.vm.add_op1(Op::Factorial, order.get() as u16, *span);
             }
-            Expression::UnaryOperator(_span, UnaryOperator::LogicalNeg, lhs, _type) => {
+            Expression::UnaryOperator(span, UnaryOperator::LogicalNeg, lhs, _type) => {
                 self.compile_expression(lhs);
-                self.vm.add_op(Op::LogicalNeg);
+                self.vm.add_op(Op::LogicalNeg, *span);
             }
-            Expression::BinaryOperator(_span, operator, lhs, rhs, _type) => {
+            Expression::BinaryOperator(span, operator, lhs, rhs, _type) => {
                 self.compile_expression(lhs);
                 self.compile_expression(rhs);
 
@@ -129,9 +130,14 @@ impl BytecodeInterpreter {
                     BinaryOperator::LogicalAnd => Op::LogicalAnd,
                     BinaryOperator::LogicalOr => Op::LogicalOr,
                 };
-                self.vm.add_op(op);
+                self.vm.add_op(
+                    op,
+                    span.unwrap_or_else(|| {
+                        crate::span::Span::in_between(lhs.full_span(), rhs.full_span())
+                    }),
+                );
             }
-            Expression::BinaryOperatorForDate(_span, operator, lhs, rhs, type_) => {
+            Expression::BinaryOperatorForDate(span, operator, lhs, rhs, type_) => {
                 self.compile_expression(lhs);
                 self.compile_expression(rhs);
 
@@ -144,7 +150,13 @@ impl BytecodeInterpreter {
                     // this specific purpose. We also need to handle errors in case no such unit
                     // was registered.
                     let second_idx = self.unit_name_to_constant_index.get("second");
-                    self.vm.add_op1(Op::LoadConstant, *second_idx.unwrap());
+                    self.vm.add_op1(
+                        Op::LoadConstant,
+                        *second_idx.unwrap(),
+                        span.unwrap_or_else(|| {
+                            crate::span::Span::in_between(lhs.full_span(), rhs.full_span())
+                        }),
+                    );
                     Op::DiffDateTime
                 } else {
                     match operator {
@@ -154,9 +166,14 @@ impl BytecodeInterpreter {
                     }
                 };
 
-                self.vm.add_op(op);
+                self.vm.add_op(
+                    op,
+                    span.unwrap_or_else(|| {
+                        crate::span::Span::in_between(lhs.full_span(), rhs.full_span())
+                    }),
+                );
             }
-            Expression::FunctionCall(_span, _full_span, name, args, _type) => {
+            Expression::FunctionCall(_span, full_span, name, args, _type) => {
                 // Put all arguments on top of the stack
                 for arg in args {
                     self.compile_expression(arg);
@@ -164,14 +181,16 @@ impl BytecodeInterpreter {
 
                 if let Some(idx) = self.vm.get_ffi_callable_idx(name) {
                     // TODO: check overflow:
-                    self.vm.add_op2(Op::FFICallFunction, idx, args.len() as u16);
+                    self.vm
+                        .add_op2(Op::FFICallFunction, idx, args.len() as u16, *full_span);
                 } else {
                     let idx = self.vm.get_function_idx(name);
 
-                    self.vm.add_op2(Op::Call, idx, args.len() as u16); // TODO: check overflow
+                    self.vm
+                        .add_op2(Op::Call, idx, args.len() as u16, *full_span); // TODO: check overflow
                 }
             }
-            Expression::InstantiateStruct(_span, exprs, struct_info) => {
+            Expression::InstantiateStruct(span, exprs, struct_info) => {
                 // structs must be consistently ordered in the VM, so we reorder
                 // the field values so that they are evaluated in the order the
                 // struct fields are defined.
@@ -186,10 +205,14 @@ impl BytecodeInterpreter {
 
                 let struct_info_idx = self.vm.get_structinfo_idx(&struct_info.name).unwrap() as u16;
 
-                self.vm
-                    .add_op2(Op::BuildStructInstance, struct_info_idx, exprs.len() as u16);
+                self.vm.add_op2(
+                    Op::BuildStructInstance,
+                    struct_info_idx,
+                    exprs.len() as u16,
+                    *span,
+                );
             }
-            Expression::AccessField(_span, _full_span, expr, attr, struct_type, _result_type) => {
+            Expression::AccessField(_span, full_span, expr, attr, struct_type, _result_type) => {
                 self.compile_expression(expr);
 
                 let Type::Struct(ref struct_info) = struct_type.to_concrete_type() else {
@@ -200,9 +223,10 @@ impl BytecodeInterpreter {
 
                 let idx = struct_info.fields.get_index_of(*attr).unwrap();
 
-                self.vm.add_op1(Op::AccessStructField, idx as u16);
+                self.vm
+                    .add_op1(Op::AccessStructField, idx as u16, *full_span);
             }
-            Expression::CallableCall(_span, callable, args, _type) => {
+            Expression::CallableCall(span, callable, args, _type) => {
                 // Put all arguments on top of the stack
                 for arg in args {
                     self.compile_expression(arg);
@@ -211,18 +235,18 @@ impl BytecodeInterpreter {
                 // Put the callable on top of the stack
                 self.compile_expression(callable);
 
-                self.vm.add_op1(Op::CallCallable, args.len() as u16);
+                self.vm.add_op1(Op::CallCallable, args.len() as u16, *span);
             }
-            Expression::Boolean(_, val) => {
+            Expression::Boolean(span, val) => {
                 let index = self.vm.add_constant(Constant::Boolean(*val));
-                self.vm.add_op1(Op::LoadConstant, index);
+                self.vm.add_op1(Op::LoadConstant, index, *span);
             }
-            Expression::String(_, string_parts) => {
+            Expression::String(span, string_parts) => {
                 for part in string_parts {
                     match part {
                         StringPart::Fixed(s) => {
                             let index = self.vm.add_constant(Constant::String(s.clone()));
-                            self.vm.add_op1(Op::LoadConstant, index)
+                            self.vm.add_op1(Op::LoadConstant, index, *span)
                         }
                         StringPart::Interpolation {
                             expr,
@@ -233,22 +257,23 @@ impl BytecodeInterpreter {
                             let index = self.vm.add_constant(Constant::FormatSpecifiers(
                                 format_specifiers.map(|s| s.to_compact_string()),
                             ));
-                            self.vm.add_op1(Op::LoadConstant, index)
+                            self.vm.add_op1(Op::LoadConstant, index, *span)
                         }
                     }
                 }
-                self.vm.add_op1(Op::JoinString, string_parts.len() as u16); // TODO: this can overflow
+                self.vm
+                    .add_op1(Op::JoinString, string_parts.len() as u16, *span); // TODO: this can overflow
             }
-            Expression::Condition(_, condition, then_expr, else_expr) => {
+            Expression::Condition(span, condition, then_expr, else_expr) => {
                 self.compile_expression(condition);
 
                 let if_jump_offset = self.vm.current_offset() + 1; // +1 for the opcode
-                self.vm.add_op1(Op::JumpIfFalse, 0xffff);
+                self.vm.add_op1(Op::JumpIfFalse, 0xffff, *span);
 
                 self.compile_expression(then_expr);
 
                 let else_jump_offset = self.vm.current_offset() + 1;
-                self.vm.add_op1(Op::Jump, 0xffff);
+                self.vm.add_op1(Op::Jump, 0xffff, *span);
 
                 let else_block_offset = self.vm.current_offset();
                 self.vm
@@ -261,12 +286,12 @@ impl BytecodeInterpreter {
                 self.vm
                     .patch_u16_value_at(else_jump_offset, end_offset - (else_jump_offset + 2));
             }
-            Expression::List(_, elements, _) => {
+            Expression::List(span, elements, _) => {
                 for element in elements {
                     self.compile_expression(element);
                 }
 
-                self.vm.add_op1(Op::BuildList, elements.len() as u16);
+                self.vm.add_op1(Op::BuildList, elements.len() as u16, *span);
             }
             Expression::TypedHole(_, _) => {
                 unreachable!("Typed holes cause type inference errors")
@@ -305,7 +330,7 @@ impl BytecodeInterpreter {
         match stmt {
             Statement::Expression(expr) => {
                 self.compile_expression(expr);
-                self.vm.add_op(Op::Return);
+                self.vm.add_op(Op::Return, expr.full_span());
             }
             Statement::DefineVariable(define_variable) => {
                 self.compile_define_variable(define_variable);
@@ -338,7 +363,7 @@ impl BytecodeInterpreter {
 
                 self.compile_expression(expr);
 
-                self.vm.add_op(Op::Return);
+                self.vm.add_op(Op::Return, expr.full_span());
 
                 self.locals.pop();
 
@@ -444,8 +469,13 @@ impl BytecodeInterpreter {
                 ); // TODO: there is some asymmetry here because we do not introduce identifiers for base units
 
                 self.compile_expression(expr);
-                self.vm
-                    .add_op2(Op::SetUnitConstant, unit_information_idx, constant_idx);
+                self.vm.add_op2(
+                    Op::SetUnitConstant,
+                    unit_information_idx,
+                    constant_idx,
+                    // TODO: TAMO: We should have the full span of the stmt here
+                    expr.full_span(),
+                );
 
                 // TODO: code duplication with DeclareBaseUnit branch above
                 for (name, _) in decorator::name_and_aliases(unit_name, decorators) {
@@ -461,7 +491,8 @@ impl BytecodeInterpreter {
                 let idx = self.vm.add_string(
                     m::dimmed("=") + m::whitespace(" ") + arg.get_type_scheme().pretty_print(), // TODO
                 );
-                self.vm.add_op1(Op::PrintString, idx);
+                // TODO: TAMO: We should have the full span of the stmt here
+                self.vm.add_op1(Op::PrintString, idx, arg.full_span());
             }
             Statement::ProcedureCall(kind, args) => {
                 // Put all arguments on top of the stack
@@ -481,6 +512,8 @@ impl BytecodeInterpreter {
                     callable_idx,
                     args.len() as u16,
                     spans_idx,
+                    // TODO: TAMO: We should have the full span of the stmt here
+                    args[0].full_span(),
                 );
                 // TODO: check overflow
             }

+ 17 - 0
numbat/src/span.rs

@@ -49,6 +49,23 @@ pub struct Span {
 }
 
 impl Span {
+    /// Returns a new span covering everything in-between left and right.
+    /// Should only be used when left and right comes from the same file.
+    /// If they're not we're going to:
+    /// - panic in debug mode
+    /// - return the code_source_id of the left part in release mode
+    #[track_caller]
+    #[inline]
+    pub fn in_between(left: Span, right: Span) -> Span {
+        debug_assert_eq!(left.code_source_id, right.code_source_id);
+        crate::span::Span {
+            start: left.end,
+            end: right.start,
+            // lhs and rhs should share the same code_source_id
+            code_source_id: left.code_source_id,
+        }
+    }
+
     pub fn extend(&self, other: &Span) -> Span {
         assert_eq!(self.code_source_id, other.code_source_id);
         Span {

+ 38 - 32
numbat/src/vm.rs

@@ -272,9 +272,9 @@ pub struct ExecutionContext<'a> {
 
 #[derive(Clone)]
 pub struct Vm {
-    /// The actual code of the program, structured by function name. The code
+    /// The actual code and spans of the program, structured by function name. The code
     /// for the global scope is at index 0 under the function name `<main>`.
-    bytecode: Vec<(CompactString, Vec<u8>)>,
+    bytecode: Vec<(CompactString, Vec<u8>, Vec<Span>)>,
 
     /// An index into the `bytecode` vector referring to the function which is
     /// currently being compiled.
@@ -322,7 +322,7 @@ pub struct Vm {
 impl Vm {
     pub fn new() -> Self {
         Self {
-            bytecode: vec![("<main>".into(), vec![])],
+            bytecode: vec![("<main>".into(), vec![], vec![])],
             current_chunk_index: 0,
             constants: vec![],
             struct_infos: IndexMap::new(),
@@ -347,8 +347,9 @@ impl Vm {
 
     // The following functions are helpers for the compilation process
 
-    fn current_chunk_mut(&mut self) -> &mut Vec<u8> {
-        &mut self.bytecode[self.current_chunk_index].1
+    fn current_chunk_mut(&mut self) -> (&mut Vec<u8>, &mut Vec<Span>) {
+        let current = &mut self.bytecode[self.current_chunk_index];
+        (&mut current.1, &mut current.2)
     }
 
     fn push_u16(chunk: &mut Vec<u8>, data: u16) {
@@ -357,29 +358,34 @@ impl Vm {
         chunk.push(arg_bytes[1]);
     }
 
-    pub fn add_op(&mut self, op: Op) {
-        self.current_chunk_mut().push(op as u8);
+    pub fn add_op(&mut self, op: Op, span: Span) {
+        let (bytecode, spans) = self.current_chunk_mut();
+        bytecode.push(op as u8);
+        spans.push(span);
     }
 
-    pub fn add_op1(&mut self, op: Op, arg: u16) {
-        let current_chunk = self.current_chunk_mut();
-        current_chunk.push(op as u8);
-        Self::push_u16(current_chunk, arg)
+    pub fn add_op1(&mut self, op: Op, arg: u16, span: Span) {
+        let (bytecode, spans) = self.current_chunk_mut();
+        bytecode.push(op as u8);
+        Self::push_u16(bytecode, arg);
+        spans.extend(std::iter::repeat_n(span, 3));
     }
 
-    pub(crate) fn add_op2(&mut self, op: Op, arg1: u16, arg2: u16) {
-        let current_chunk = self.current_chunk_mut();
-        current_chunk.push(op as u8);
-        Self::push_u16(current_chunk, arg1);
-        Self::push_u16(current_chunk, arg2);
+    pub(crate) fn add_op2(&mut self, op: Op, arg1: u16, arg2: u16, span: Span) {
+        let (bytecode, spans) = self.current_chunk_mut();
+        bytecode.push(op as u8);
+        Self::push_u16(bytecode, arg1);
+        Self::push_u16(bytecode, arg2);
+        spans.extend(std::iter::repeat_n(span, 5));
     }
 
-    pub(crate) fn add_op3(&mut self, op: Op, arg1: u16, arg2: u16, arg3: u16) {
-        let current_chunk = self.current_chunk_mut();
-        current_chunk.push(op as u8);
-        Self::push_u16(current_chunk, arg1);
-        Self::push_u16(current_chunk, arg2);
-        Self::push_u16(current_chunk, arg3);
+    pub(crate) fn add_op3(&mut self, op: Op, arg1: u16, arg2: u16, arg3: u16, span: Span) {
+        let (bytecode, spans) = self.current_chunk_mut();
+        bytecode.push(op as u8);
+        Self::push_u16(bytecode, arg1);
+        Self::push_u16(bytecode, arg2);
+        Self::push_u16(bytecode, arg3);
+        spans.extend(std::iter::repeat_n(span, 7));
     }
 
     pub fn current_offset(&self) -> u16 {
@@ -388,9 +394,9 @@ impl Vm {
 
     pub fn patch_u16_value_at(&mut self, offset: u16, arg: u16) {
         let offset = offset as usize;
-        let chunk = self.current_chunk_mut();
-        chunk[offset] = (arg & 0xff) as u8;
-        chunk[offset + 1] = ((arg >> 8) & 0xff) as u8;
+        let (bytecode, _spans) = self.current_chunk_mut();
+        bytecode[offset] = (arg & 0xff) as u8;
+        bytecode[offset + 1] = ((arg >> 8) & 0xff) as u8;
     }
 
     pub fn add_constant(&mut self, constant: Constant) -> u16 {
@@ -437,7 +443,7 @@ impl Vm {
     }
 
     pub(crate) fn begin_function(&mut self, name: &str) {
-        self.bytecode.push((name.into(), vec![]));
+        self.bytecode.push((name.into(), vec![], vec![]));
         self.current_chunk_index = self.bytecode.len() - 1
     }
 
@@ -453,7 +459,7 @@ impl Vm {
             .bytecode
             .iter()
             .rev()
-            .position(|(n, _)| n == name)
+            .position(|(n, _, _)| n == name)
             .unwrap();
         let position = self.bytecode.len() - 1 - rev_position;
         assert!(position <= u16::MAX as usize);
@@ -493,7 +499,7 @@ impl Vm {
         for (idx, identifier) in self.unit_information.iter().enumerate() {
             eprintln!("  {:04} {}", idx, identifier.0);
         }
-        for (idx, (function_name, bytecode)) in self.bytecode.iter().enumerate() {
+        for (idx, (function_name, bytecode, _spans)) in self.bytecode.iter().enumerate() {
             eprintln!(".CODE {idx} ({function_name})");
             let mut offset = 0;
             while offset < bytecode.len() {
@@ -1096,10 +1102,10 @@ fn vm_basic() {
     vm.add_constant(Constant::Scalar(42.0));
     vm.add_constant(Constant::Scalar(1.0));
 
-    vm.add_op1(Op::LoadConstant, 0);
-    vm.add_op1(Op::LoadConstant, 1);
-    vm.add_op(Op::Add);
-    vm.add_op(Op::Return);
+    vm.add_op1(Op::LoadConstant, 0, Span::dummy());
+    vm.add_op1(Op::LoadConstant, 1, Span::dummy());
+    vm.add_op(Op::Add, Span::dummy());
+    vm.add_op(Op::Return, Span::dummy());
 
     let mut print_fn = |_: &Markup| {};
     let mut ctx = ExecutionContext {