From 2b694f3b1b7d95030c3328b380b841f563b8722f Mon Sep 17 00:00:00 2001 From: "Petter T." Date: Wed, 22 Nov 2023 17:29:39 +0100 Subject: [PATCH] Bytecode interpreter: Populate cells on the heap (bug #64914) Previously, cells in the bytecode interpreter were being populated on the stack instead of on the heap. This led to repeatable crashes with cell concatenations like `b = {b{:} a{:}}`. This patch moves such cells to the heap so that large cell concatenations are made possible. [Commit message summary and text by Arun Giridhar] * pt-bytecode-vm.cc: Rework how cells are pushed. PUSH_CELL_BIG, New opcode APPEND_CELL, New opcode PUSH_OV_U64, EXPAND_CS_LIST removed * pt-bytecode-walk.cc: Use new opcodes * pt-bytecode.h: New opcodes, delete redundant * test/compile/bytecode_cell.m: Add more tests for cells --- libinterp/parse-tree/pt-bytecode-vm.cc | 311 +++++++++++++++-------- libinterp/parse-tree/pt-bytecode-walk.cc | 87 +++++-- libinterp/parse-tree/pt-bytecode.h | 4 +- test/compile/bytecode_cell.m | 67 +++++ 4 files changed, 345 insertions(+), 124 deletions(-) diff --git a/libinterp/parse-tree/pt-bytecode-vm.cc b/libinterp/parse-tree/pt-bytecode-vm.cc index ddd63ae0cd..0d14721cf5 100644 --- a/libinterp/parse-tree/pt-bytecode-vm.cc +++ b/libinterp/parse-tree/pt-bytecode-vm.cc @@ -48,6 +48,7 @@ #include "ov-ref.h" #include "ov-range.h" #include "ov-inline.h" +#include "ov-cell.h" #include "ov-vm.h" #include "ov-fcn-handle.h" @@ -278,9 +279,6 @@ octave::opcodes_to_strings (std::vector &v_code, std::vector &v_code, std::vector{}); - } - DISPATCH_1BYTEOP (); -expand_cs_list: - { - octave_value cs = TOP_OV (); - octave_value cols_ov = SEC_OV (); // n columns - octave_value rows_ov = THIRD_OV (); // n rows + DISPATCH (); - if (cs.is_cs_list ()) - { - STACK_DESTROY (3); +append_cell: +{ + // The stack looks like this: + // top: Element to add + // -1: Row counter + // -2: Column counter + // -3: The cell to add elements to + + // Essentially there is an APPEND_CELL opcode after + // each element argument. The last APPEND_CELL in a row + // has arg0 set to a number to indicate if it is the last + // column in the row, with distinction between: + // + // a middle row == 1 + // the last row of many == 2 + // the last row of one == 3 + // the last row of many == 4 + // + // This is needed since the first row sets how many columns the + // other rows need to have and the last rows need to pop the two + // counters on the stack. - octave_value_list tmp_ovl = cs.list_value (); + // Note that 'b = {}; c = {b{:}, b{:}}" makes c size 2x0 + // while 'b = {}; c = {b{:}}" makes c size 0x0 - for (octave_idx_type i = 0; i < tmp_ovl.length (); i++) - { - PUSH_OV (tmp_ovl (i)); - cols_ov.non_const_unary_op (octave_value::unary_op::op_incr); - } - } - else - { - STACK_DESTROY (3); + int last = arg0; - cols_ov.non_const_unary_op (octave_value::unary_op::op_incr); + // The element we need to insert into the cell + octave_value ov = std::move (TOP_OV ()); + STACK_SHRINK (1); - PUSH_OV (cs); - } + // The cell we are adding the element to + octave_value &ov_cell = THIRD_OV (); + octave_cell &ovb_cell = REP (octave_cell, ov_cell); - PUSH_OV (rows_ov); - PUSH_OV (cols_ov); - } - DISPATCH_1BYTEOP (); + Cell &cell = ovb_cell.octave_cell::matrix_ref (); + + octave_idx_type n_rows = cell.rows (); + octave_idx_type n_cols = cell.cols (); + + // The column counter + octave_value &ov_i_col = SEC_OV (); + octave_int64_scalar &ovb_i_col = REP (octave_int64_scalar, ov_i_col); + auto &i_col = ovb_i_col.octave_int64_scalar::scalar_ref (); + + octave_idx_type i_col_idx = i_col; + + // The row counter + octave_value &ov_i_row = TOP_OV (); + octave_int64_scalar &ovb_i_row = REP (octave_int64_scalar, ov_i_row); + auto &i_row = ovb_i_row.octave_int64_scalar::scalar_ref (); + + octave_idx_type i_row_idx = i_row; + + if (ov.is_cs_list ()) + { + octave_value_list ovl = ov.list_value (); + octave_idx_type n = ovl.length (); + + // If we are operating on the first row, increase its size if we + // are about to overflow. + if (i_row_idx == 0 && i_col_idx + n > n_cols) + { + cell.resize (dim_vector (n_rows, i_col_idx + n)); + n_cols = i_col_idx + n; + } + + // If there is room in the row, insert the elements into it. + // Note that if there is no room, no element will be added to the cell, + // there will be an error after the row's last element's arg is executed. + // I.e. all the arg expressions in the row are always executed before + // the error. + if (i_col_idx + n <= n_cols) + { + // Insert the elements of the cs-list into to the row of the cell. + for (octave_idx_type i = 0; i < n; i++) + cell (i_row_idx, i_col_idx + i) = ovl (i); + } + + i_col += n; + i_col_idx += n; + } + else if (ov.is_defined ()) + { + // If we are operating on the first row, increase its size if we + // are about to overflow. + if (i_row_idx == 0 && i_col_idx >= n_cols) + { + cell.resize (dim_vector (1, i_col_idx + 1)); + n_cols++; + } + + // If there is room in the row, insert the element into it. + // Note that if there is no room, no element will be added to the cell, + // there will be an error after the row's last element's arg is executed. + // I.e. all the arg expressions in the row are always executed before + // the error. + if (i_col_idx < n_cols) + cell (i_row_idx, i_col_idx) = ov; + + i_col = i_col + 1L; + i_col_idx++; + } + else + { + ; // If the arg is undefined, nothing is added to the row in the cell. + } + + if (last == 1) // Last element in a middle row in a cell with multiple rows. + { + // The amount of columns in a row has to match the first row's. + if (i_col_idx && i_col_idx != n_cols) + { + (*sp++).pee = new execution_exception {"error","","number of columns must match"}; + (*sp++).i = static_cast (error_type::EXECUTION_EXC); + goto unwind; + } + + // Prepare for APPEND_CELL to operate on the next row. + i_row += i_col_idx ? 1L : 0; // Only advance row idx if something was inserted. + i_col = 0; + } + else if (last == 2) // Last element in the last row in a cell with multiple rows. + { + // The amount of columns in a row has to match the first row's unless + // the amount of columns in the current row is zero. + if (i_col_idx && i_col_idx != n_cols) + { + (*sp++).pee = new execution_exception {"error","","number of columns must match"}; + (*sp++).i = static_cast (error_type::EXECUTION_EXC); + goto unwind; + } + + if (i_col_idx) + i_row_idx += 1; // If this row was not empty + else if (n_cols == 0) + i_row_idx += 1; // If this row was empty and is supposed to be empty. + + // If all the args for a row were empty, the next row's args were inserted into the empty row, + // so there might be trailing empty rows that we need to remove. + if (i_row_idx != n_rows) + { + cell.resize (dim_vector (i_row_idx, n_cols)); + } + + // Destroy the col and row counters + STACK_DESTROY (2); + // The cell is now on the top of the stack + } + else if (last == 3) // Last element in row in a cell with one row + { + // If a smaller number of columns were inserted than there are arguments + // (the amounts of args sets the initial size) we need to shrink the cell + if (i_col_idx < n_cols) + { + // If the row is empty, the resulting cell should be 0x0. + // "b = {}; c = {b{:}}" yields c as a 0x0 cell + // but: + // "b = {}; c = {b{:}; b{:}}" yields c as a 2x0 cell + cell.resize (dim_vector (i_col_idx ? 1 : 0, i_col_idx)); + } + + // Destroy the col and row counters + STACK_DESTROY (2); + // The cell is now on the top of the stack + } + else if (last == 4) // Last element in the first row, more than one row total + { + // If a smaller number of columns were inserted than there are arguments + // (the amounts of args sets the initial size) we need to shrink the cell + if (i_col_idx < n_cols) + { + cell.resize (dim_vector (n_rows, i_col_idx)); + } + + // Prepare for APPEND_CELL to operate on the next row + i_col = 0; + // Always advance to next row, even if first row was empty since + // if the first row was empty, all rows need to be empty. + i_row += 1L; + } +} +DISPATCH (); { // TODO: Too much code. Should be broken out? diff --git a/libinterp/parse-tree/pt-bytecode-walk.cc b/libinterp/parse-tree/pt-bytecode-walk.cc index 6b0764744b..53dbe9c6eb 100644 --- a/libinterp/parse-tree/pt-bytecode-walk.cc +++ b/libinterp/parse-tree/pt-bytecode-walk.cc @@ -4300,24 +4300,53 @@ visit_cell (tree_cell &m) { INC_DEPTH (); m_unknown_nargout++; + + octave_idx_type n_cols = 0; + octave_idx_type n_rows = 0; + // Count the amount of rows and columns for an initial guess on the size + // of the cell. auto p = m.begin (); - int n_cols = -1; + while (p != m.end ()) + { + // This is a row + tree_argument_list *elt = *p++; + n_rows++; - PUSH_CODE (INSTR::PUSH_OV_U64); // number of rows + CHECK_NONNULL (elt); - // Push each row element to operand stack + octave_idx_type n_cols_this_row = 0; + for (auto it = elt->begin (); it != elt->end (); it++) + n_cols_this_row++; + + if (n_cols_this_row > n_cols) + n_cols = n_cols_this_row; + } + + if (n_cols < 256 && n_rows < 256) + { + PUSH_CODE (INSTR::PUSH_CELL); + PUSH_CODE (n_rows); + PUSH_CODE (n_cols); + } + else + { + PUSH_CODE (INSTR::PUSH_CELL_BIG); + PUSH_CODE_INT (n_rows); + PUSH_CODE_INT (n_cols); + } + + // Code to push each row arg to operand stack, with a APPEND_CELL after it. + p = m.begin (); + octave_idx_type row_i = 0; while (p != m.end ()) { // This is a row tree_argument_list *elt = *p++; - PUSH_CODE (INSTR::PUSH_OV_U64); //number of columns - - int n_cols_old = n_cols; - n_cols = 0; CHECK_NONNULL (elt); - for (auto it = elt->begin (); it != elt->end (); it++) + octave_idx_type n_cols_this_row = 0; + for (auto it = elt->begin (); it != elt->end (); /* ++it in if bellow */) { // This is an element tree_expression *e = *it; @@ -4328,25 +4357,39 @@ visit_cell (tree_cell &m) e->accept (*this); DEC_DEPTH (); POP_NARGOUT (); - n_cols++; - // We now need to expand the value (if it is an cs list) - // and rotate the counters to the top of the stack. - // - // Expand cslist does that in one opcode. - PUSH_CODE (INSTR::EXPAND_CS_LIST); + n_cols_this_row++; + + // The last APPEND_CELL in a row need special markers + if (++it != elt->end ()) // Not last? + { + PUSH_CODE (INSTR::APPEND_CELL); + PUSH_CODE (0); // 0 => Not last APPEND_CELL in row + } } - if (n_cols > n_cols_old) - n_cols_old = n_cols; + // If there are no args in the row, e.g. 'a = {b;;}', the APPEND_CELL still need something + // to grab on the stack, that will not be added to the row. + if (n_cols_this_row == 0) + PUSH_CODE (INSTR::PUSH_NIL); // Dummy value - // The amount of rows is on the second position of the stack, - // rotate it with the amount of columns and increment the rows. - PUSH_CODE (INSTR::ROT); - PUSH_CODE (INSTR::INCR_PREFIX); - } + // The APPEND_CELL opcode inserts element into a cell put on the stack by PUSH_CELL. + // The opcode after APPEND_CELL tells it whether the APPEND_CELL is the last in a row. + PUSH_CODE (INSTR::APPEND_CELL); + if (p == m.end ()) // Is this the last row? + { + if (n_rows == 1) + PUSH_CODE (3); // Last column in last row, only one row total + else + PUSH_CODE (2); // Last column in last row, more than one row total + } + else if (row_i == 0) + PUSH_CODE (4); // Last column in first row, more than one row total + else + PUSH_CODE (1); // Last column in row, more than one row total - PUSH_CODE (INSTR::PUSH_CELL); + row_i++; + } maybe_emit_bind_ans_and_disp (m); diff --git a/libinterp/parse-tree/pt-bytecode.h b/libinterp/parse-tree/pt-bytecode.h index 1fb78d9ab9..db76b5ad6f 100644 --- a/libinterp/parse-tree/pt-bytecode.h +++ b/libinterp/parse-tree/pt-bytecode.h @@ -106,8 +106,6 @@ enum class INSTR WORDCMD, HANDLE_SIGNALS, PUSH_CELL, - PUSH_OV_U64, - EXPAND_CS_LIST, INDEX_CELL_ID_NARGOUT0, INDEX_CELL_ID_NARGOUT1, INDEX_CELL_ID_NARGOUTN, @@ -219,6 +217,8 @@ enum class INSTR PUSH_I, PUSH_E, INDEX_STRUCT_SUBCALL, + PUSH_CELL_BIG, + APPEND_CELL, }; enum class unwind_entry_type diff --git a/test/compile/bytecode_cell.m b/test/compile/bytecode_cell.m index ed51faee20..5b1847ff38 100644 --- a/test/compile/bytecode_cell.m +++ b/test/compile/bytecode_cell.m @@ -58,6 +58,73 @@ function bytecode_cell () % Command form function call subref __printf_assert__ ("%d ", suby{:}); + + % Test making cells dynamically with unpacking of cells + a = {1,2}; + b = {}; + d = {11; 12}; + + c = {a{:}}; + assert (c, {1, 2}); + + c = {d{:}}; + assert (c, {11, 12}); + + c = {a{:}, 3, 4}; + assert (c, {1, 2, 3, 4}); + + c = {b{:}, a{:}, 3, 4, b{:}}; + assert (c, {1, 2, 3, 4}); + + c = {;;; a{:}; 3 4;;;; b{:}}; + assert (c, {1, 2; 3, 4}); + + c = {b{:}}; + assert (c, {}); + + c = {b{:}; b{:}}; + assert (c, cell (2, 0)); + + threw = false; + try + c = {b{:}; 1 2}; + catch e + assert (regexp (e.message, "number of columns must match")) + threw = true; + end + + assert (threw) + + threw = false; + try + c = {1 2 3; a{:}}; + catch e + assert (regexp (e.message, "number of columns must match")) + threw = true; + end + + assert (threw) + + threw = false; + try + c = {1 2 3; a{:}; 4 5 6}; + catch e + assert (regexp (e.message, "number of columns must match")) + threw = true; + end + + assert (threw) + + threw = false; + try + c = {a{:}; 1 2 3}; + catch e + assert (regexp (e.message, "number of columns must match")) + threw = true; + end + + assert (threw) + end function a = suby()