diff --git a/Orange/base.py b/Orange/base.py index f618a876f46..05919157b09 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -664,6 +664,13 @@ def __init__(self, cat_model, cat_features, domain): self.cat_model = cat_model self.cat_features = cat_features + def __call__(self, data, ret=Model.Value): + if isinstance(data, Table): + with data.force_unlocked(data.X): + return super().__call__(data, ret) + else: + return super().__call__(data, ret) + def predict(self, X): if self.cat_features: X = X.astype(str) @@ -824,17 +831,18 @@ def __call__(self, data, progress_callback=None): return m def fit_storage(self, data: Table): - domain, X, Y, W = data.domain, data.X, data.Y.reshape(-1), None - if self.supports_weights and data.has_weights(): - W = data.W.reshape(-1) - # pylint: disable=not-callable - clf = self.__wraps__(**self.params) - cat_features = [i for i, attr in enumerate(domain.attributes) - if attr.is_discrete] - if cat_features: - X = X.astype(str) - cat_model = clf.fit(X, Y, cat_features=cat_features, sample_weight=W) - return self.__returns__(cat_model, cat_features, domain) + with data.force_unlocked(data.X): + domain, X, Y, W = data.domain, data.X, data.Y.reshape(-1), None + if self.supports_weights and data.has_weights(): + W = data.W.reshape(-1) + # pylint: disable=not-callable + clf = self.__wraps__(**self.params) + cat_features = [i for i, attr in enumerate(domain.attributes) + if attr.is_discrete] + if cat_features: + X = X.astype(str) + cat_model = clf.fit(X, Y, cat_features=cat_features, sample_weight=W) + return self.__returns__(cat_model, cat_features, domain) def __getattr__(self, item): try: diff --git a/Orange/classification/_tree_scorers.pyx b/Orange/classification/_tree_scorers.pyx index 00bd0639ca0..cb3bcb8172f 100644 --- a/Orange/classification/_tree_scorers.pyx +++ b/Orange/classification/_tree_scorers.pyx @@ -17,7 +17,7 @@ cdef extern from "numpy/npy_math.h": cpdef enum: NULL_BRANCH = -1 -def contingency(double[:] x, int nx, double[:] y, int ny): +def contingency(const double[:] x, int nx, const double[:] y, int ny): cdef: np.ndarray[np.uint32_t, ndim=2] cont = np.zeros((ny, nx), dtype=np.uint32) int n = len(x), yi, xi @@ -28,7 +28,8 @@ def contingency(double[:] x, int nx, double[:] y, int ny): cont[yi, xi] += 1 return cont -def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx, +def find_threshold_entropy(const double[:] x, const double[:] y, + const np.intp_t[:] idx, int n_classes, int min_leaf): """ Find the threshold for continuous attribute values that maximizes @@ -89,8 +90,9 @@ def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx, return (class_entro - best_entro) / N / log(2), x[idx[best_idx]] -def find_binarization_entropy(double[:, :] cont, double[:] class_distr, - double[:] val_distr, int min_leaf): +def find_binarization_entropy(const double[:, :] cont, + const double[:] class_distr, + const double[:] val_distr, int min_leaf): """ Find the split of discrete values into two groups that optimizes information gain. @@ -187,7 +189,9 @@ def find_binarization_entropy(double[:, :] cont, double[:] class_distr, return (class_entro - best_entro) / N / log(2), best_mapping -def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf): +def find_threshold_MSE(const double[:] x, + const double[:] y, + const np.intp_t[:] idx, int min_leaf): """ Find the threshold for continuous attribute values that minimizes MSE. @@ -232,7 +236,8 @@ def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf) return (best_inter - (sum * sum) / N) / N, x[idx[best_idx]] -def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf): +def find_binarization_MSE(const double[:] x, + const double[:] y, int n_values, int min_leaf): """ Find the split of discrete values into two groups that minimizes the MSE. @@ -315,7 +320,9 @@ def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf): return (best_inter - start_inter) / x.shape[0], best_mapping -def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf): +def compute_grouped_MSE(const double[:] x, + const double[:] y, + int n_values, int min_leaf): """ Compute the MSE decrease of the given split into groups. @@ -371,8 +378,10 @@ def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf): return (inter - sum * sum / n) / x.shape[0] -def compute_predictions(double[:, :] X, int[:] code, - double[:, :] values, double[:] thresholds): +def compute_predictions(const double[:, :] X, + const int[:] code, + const double[:, :] values, + const double[:] thresholds): """ Return the values (distributions, means and variances) stored in the nodes to which the tree classify the rows in X. @@ -419,8 +428,10 @@ def compute_predictions(double[:, :] X, int[:] code, return np.asarray(predictions) -def compute_predictions_csr(X, int[:] code, - double[:, :] values, double[:] thresholds): +def compute_predictions_csr(X, + const int[:] code, + const double[:, :] values, + const double[:] thresholds): """ Same as compute_predictions except for sparse data """ @@ -431,9 +442,9 @@ def compute_predictions_csr(X, int[:] code, double[: ,:] predictions = np.empty( (X.shape[0], values.shape[1]), dtype=np.float64) - double[:] data = X.data - np.int32_t[:] indptr = X.indptr - np.int32_t[:] indices = X.indices + const double[:] data = X.data + const np.int32_t[:] indptr = X.indptr + const np.int32_t[:] indices = X.indices int ind, attr, n_rows n_rows = X.shape[0] @@ -463,8 +474,10 @@ def compute_predictions_csr(X, int[:] code, predictions[i, j] = values[node_idx, j] return np.asarray(predictions) -def compute_predictions_csc(X, int[:] code, - double[:, :] values, double[:] thresholds): +def compute_predictions_csc(X, + const int[:] code, + const double[:, :] values, + const double[:] thresholds): """ Same as compute_predictions except for sparse data """ @@ -475,9 +488,9 @@ def compute_predictions_csc(X, int[:] code, double[: ,:] predictions = np.empty( (X.shape[0], values.shape[1]), dtype=np.float64) - double[:] data = X.data - np.int32_t[:] indptr = X.indptr - np.int32_t[:] indices = X.indices + const double[:] data = X.data + const np.int32_t[:] indptr = X.indptr + const np.int32_t[:] indices = X.indices int ind, attr, n_rows n_rows = X.shape[0] diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py index fa8000ac175..764e801f53d 100644 --- a/Orange/classification/tree.py +++ b/Orange/classification/tree.py @@ -112,7 +112,7 @@ def _score_disc(): cont_entr = np.sum(cont * np.log(cont)) score = (class_entr - attr_entr + cont_entr) / n / np.log(2) score *= n / len(data) # punishment for missing values - branches = col_x + branches = col_x.copy() branches[np.isnan(branches)] = -1 if score == 0: return REJECT_ATTRIBUTE diff --git a/Orange/data/instance.py b/Orange/data/instance.py index c02ddf9f570..434dbe0ae55 100644 --- a/Orange/data/instance.py +++ b/Orange/data/instance.py @@ -34,11 +34,12 @@ def __init__(self, domain, data=None, id=None): self._weight = 1 elif isinstance(data, Instance) and data.domain == domain: self._x = np.array(data._x) - self._y = np.array(data._y) + self._y = np.atleast_1d(np.array(data._y)) self._metas = np.array(data._metas) self._weight = data._weight else: self._x, self._y, self._metas = domain.convert(data) + self._y = np.atleast_1d(self._y) self._weight = 1 if id is not None: @@ -116,7 +117,10 @@ def __getitem__(self, key): if 0 <= idx < len(self._domain.attributes): value = self._x[idx] elif idx >= len(self._domain.attributes): - value = self._y[idx - len(self.domain.attributes)] + if self._y.ndim == 0: + value = self._y + else: + value = self._y[idx - len(self.domain.attributes)] else: value = self._metas[-1 - idx] var = self._domain[idx] diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py index 401902f4a48..97e74952453 100644 --- a/Orange/data/pandas_compat.py +++ b/Orange/data/pandas_compat.py @@ -274,6 +274,10 @@ def vars_from_df(df, role=None, force_nominal=False): for var, col, expr in zip(Avars, Acols, Aexpr)]).T XYM.append(A) + # Let the tables share memory with pandas frame + if XYM[1] is not None and XYM[1].ndim == 2 and XYM[1].shape[1] == 1: + XYM[1] = XYM[1][:, 0] + return XYM, Domain(attrs, class_vars, metas) diff --git a/Orange/data/sql/table.py b/Orange/data/sql/table.py index d3e59f6d0c7..fa45433c433 100644 --- a/Orange/data/sql/table.py +++ b/Orange/data/sql/table.py @@ -556,7 +556,8 @@ def _filter_values(self, f): return t2 @classmethod - def from_table(cls, domain, source, row_indices=...): + def from_table(cls, domain, source, row_indices=..., *, copy=False): + # pylint: disable=unused-argument assert row_indices is ... table = source.copy() diff --git a/Orange/data/table.py b/Orange/data/table.py index 69935b1ac24..dcf951e95a0 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -1,10 +1,12 @@ import operator import os +import sys import threading import warnings import weakref import zlib from collections.abc import Iterable, Sequence, Sized +from contextlib import contextmanager from functools import reduce from itertools import chain from numbers import Real, Integral @@ -95,6 +97,7 @@ def __init__(self, table, row_index): if sp.issparse(self._y): self.sparse_y = sp.csr_matrix(self._y) self._y = np.asarray(self._y.todense())[0] + self._y = np.atleast_1d(self._y) self._metas = table.metas[row_index] if sp.issparse(self._metas): self.sparse_metas = sp.csr_matrix(self._metas) @@ -113,12 +116,16 @@ def weight(self, weight): self.table.W[self.row_index] = weight def set_class(self, value): + # pylint: disable=protected-access self._check_single_class() if not isinstance(value, Real): value = self.table.domain.class_var.to_val(value) - self._y[0] = value if self.sparse_y: self.table._Y[self.row_index, 0] = value + else: + self.table._Y[self.row_index] = value + if self.table._Y.ndim == 1: # if _y is not a view + self._y[0] = value def __setitem__(self, key, value): if not isinstance(key, Integral): @@ -131,17 +138,21 @@ def __setitem__(self, key, value): raise TypeError("Expected primitive value, got '%s'" % type(value).__name__) if key < len(self._x): - self._x[key] = value + # write to self.table.X to support table unlocking for live instances + self.table.X[self.row_index, key] = value if self.sparse_x is not None: - self.table.X[self.row_index, key] = value + self._x[key] = value else: - self._y[key - len(self._x)] = value if self.sparse_y is not None: self.table._Y[self.row_index, key - len(self._x)] = value + else: + self.table._Y[self.row_index] = value + if self.table._Y.ndim == 1: # if _y is not a view + self._y[0] = value else: - self._metas[-1 - key] = value - if self.sparse_metas: - self.table.metas[self.row_index, -1 - key] = value + self.table.metas[self.row_index, -1 - key] = value + if self.sparse_metas is not None: + self._metas[-1 - key] = value def _str(self, limit): def sp_values(matrix, variables): @@ -234,14 +245,15 @@ def get_subarray(self, source, row_indices, n_rows): arr = match_density(_subarray(source.metas, row_indices, [-1 - x for x in self.src_cols])) elif self.subarray_from == "Y": + Y = source.Y if source.Y.ndim == 2 else source.Y[:, None] arr = match_density(_subarray( - source._Y, row_indices, + Y, row_indices, [x - n_src_attrs for x in self.src_cols])) else: assert False if arr.dtype != self.dtype: arr = arr.astype(self.dtype) - assert arr.ndim == 2 + assert arr.ndim == 2 or self.subarray_from == "Y" and arr.ndim == 1 return arr def get_columns(self, source, row_indices, n_rows, out=None, target_indices=None): @@ -257,8 +269,13 @@ def get_columns(self, source, row_indices, n_rows, out=None, target_indices=None # converting to csc before instead of each column is faster # do not convert if not required if any(isinstance(x, int) for x in self.src_cols): - X = csc_matrix(source.X) if self.is_sparse else source.X - Y = csc_matrix(source._Y) if self.is_sparse else source._Y + X = source.X + Y = source.Y + if Y.ndim == 1: + Y = Y[:, None] + if self.is_sparse: + X = csc_matrix(X) + Y = csc_matrix(Y) if self.row_selection_needed: if row_indices is ...: @@ -346,12 +363,14 @@ class Table(Sequence, Storage): name = "untitled" domain = Domain([]) - X = _Y = metas = W = np.zeros((0, 0)) - X.setflags(write=False) + _X = _Y = _metas = _W = np.zeros((0, 0)) # pylint: disable=invalid-name + _unlocked = 0 # pylint: disable=invalid-name ids = np.zeros(0) ids.setflags(write=False) attributes = frozendict() + _Unlocked_X, _Unlocked_Y, _Unlocked_metas, _Unlocked_W = 1, 2, 4, 8 + @property def columns(self): """ @@ -365,22 +384,152 @@ def columns(self): _next_instance_id = 0 _next_instance_lock = Lock() + def _check_unlocked(self, partflag): + if not self._unlocked & partflag: + raise ValueError("Table is read-only unless unlocked") + @property - def Y(self): - if self._Y.shape[1] == 1: - return self._Y[:, 0] + def X(self): # pylint: disable=invalid-name + return self._X + + @X.setter + def X(self, value): + self._check_unlocked(self._Unlocked_X) + self._X = _dereferenced(value) + + @property + def Y(self): # pylint: disable=invalid-name return self._Y @Y.setter def Y(self, value): - if len(value.shape) == 1: - value = value[:, None] + self._check_unlocked(self._Unlocked_Y) if sp.issparse(value) and len(self) != value.shape[0]: value = value.T if sp.issparse(value): - value = value.toarray() + value = _dereferenced(value.toarray()) + if value.ndim == 2 and value.shape[1] == 1: + value = value[:, 0].copy() # no views! self._Y = value + @property + def metas(self): + return self._metas + + @metas.setter + def metas(self, value): + self._check_unlocked(self._Unlocked_metas) + self._metas = _dereferenced(value) + + @property + def W(self): # pylint: disable=invalid-name + return self._W + + @W.setter + def W(self, value): + self._check_unlocked(self._Unlocked_W) + self._W = value + + def __setstate__(self, state): + # Backward compatibility with pickles before table locking + self._unlocked = 0 # __dict__ seems to be cleared before calling __setstate__ + with self.unlocked(): + for k in ("X", "W", "metas"): + if k in state: + setattr(self, k, state.pop(k)) + self.__dict__.update(state) + + def _lock_parts(self): + return ((self._X, self._Unlocked_X, "X"), + (self._Y, self._Unlocked_Y, "Y"), + (self._metas, self._Unlocked_metas, "metas"), + (self._W, self._Unlocked_W, "weights")) + + def _update_locks(self, force=False, unlock_bases=()): + def sync(*xs): + for x in xs: + try: + undo_on_fail.append((x, x.flags.writeable)) + x.flags.writeable = writeable + except ValueError: + if force \ + and writeable \ + and x.base is not None \ + and not x.base.flags.writeable: + x.base.flags.writeable = writeable + x.flags.writeable = writeable + forced_bases.append(x.base) + else: + raise + + forced_bases = [] + undo_on_fail = [] + for base in unlock_bases: + base.flags.writeable = False + try: + for part, flag, _ in self._lock_parts(): + if part is None: + continue + writeable = bool(self._unlocked & flag) + if sp.isspmatrix_csr(part) or sp.isspmatrix_csc(part): + sync(part.data, part.indices, part.indptr) + elif sp.isspmatrix_coo(part): + sync(part.data, part.row, part.col) + elif sp.issparse(part): + raise ValueError("Unsupported sparse data type") + else: + sync(part) + except: + for part, flag in undo_on_fail: + part.flags.writeable = flag + raise + return tuple(forced_bases) + + def __unlocked(self, *parts, force=False): + prev_state = self._unlocked + for part, flag, _ in self._lock_parts(): + if not parts or any(ppart is part for ppart in parts): + self._unlocked |= flag + try: + forced_bases = self._update_locks(force) + yield + finally: + self._unlocked = prev_state + self._update_locks(unlock_bases=forced_bases) + + def force_unlocked(self, *parts): + """ + Unlocking without any checks. + + Use with extreme caution. This is meant primarily for 3rd party + functions in Cython that expect read-write buffer, but do not + actually modify it. the given parts (default: all parts) of the table. + + The function will still fail to unlock and raise an exception if the + table contains view to another table. + """ + return contextmanager(self.__unlocked)(*parts, force=True) + + def unlocked(self, *parts): + """ + Unlock the given parts (default: all parts) of the table. + + The caller must ensure that the table is safe to modify. The function + will raise an exception if the table contains view to other table. + """ + def can_unlock(x): + if sp.issparse(x): + return can_unlock(x.data) + return x.flags.writeable or x.flags.owndata + + for part, flag, name in self._lock_parts(): + if not flag & self._unlocked \ + and (not parts or any(ppart is part for ppart in parts)) \ + and part is not None and not can_unlock(part): + raise ValueError(f"'{name}' is a view into another table " + "and cannot be unlocked") + return contextmanager(self.__unlocked)(*parts) + def __new__(cls, *args, **kwargs): def warn_deprecated(method): warnings.warn("Direct calls to Table's constructor are deprecated " @@ -390,7 +539,9 @@ def warn_deprecated(method): if not args: if not kwargs: - return super().__new__(cls) + self = super().__new__(cls) + self._unlocked = 0 + return self else: raise TypeError("Table() must not be called directly") @@ -405,7 +556,8 @@ def warn_deprecated(method): elif isinstance(args[0], Table): if len(args) > 1: raise TypeError("Table(table: Table) expects just one argument") - return cls.from_table(args[0].domain, args[0], **kwargs) + return cls.from_table(args[0].domain, args[0], + copy=kwargs.pop("copy", True), **kwargs) elif isinstance(args[0], Domain): domain, args = args[0], args[1:] if not args: @@ -426,9 +578,8 @@ def warn_deprecated(method): return cls.from_numpy(domain, *args, **kwargs) - def __init__(self, *args, **kwargs): - # So subclasses can expect to call super without breakage; noop - pass + def __init__(self, *args, **kwargs): # pylint: disable=unused-argument + self._update_locks() @classmethod def from_domain(cls, domain, n_rows=0, weights=False): @@ -448,19 +599,23 @@ def from_domain(cls, domain, n_rows=0, weights=False): self = cls() self.domain = domain self.n_rows = n_rows - self.X = np.zeros((n_rows, len(domain.attributes))) - self.Y = np.zeros((n_rows, len(domain.class_vars))) - if weights: - self.W = np.ones(n_rows) - else: - self.W = np.empty((n_rows, 0)) - self.metas = np.empty((n_rows, len(self.domain.metas)), object) - cls._init_ids(self) - self.attributes = {} + with self.unlocked(): + self.X = np.zeros((n_rows, len(domain.attributes))) + if len(domain.class_vars) != 1: + self.Y = np.zeros((n_rows, len(domain.class_vars))) + else: + self.Y = np.zeros(n_rows) + if weights: + self.W = np.ones(n_rows) + else: + self.W = np.empty((n_rows, 0)) + self.metas = np.empty((n_rows, len(self.domain.metas)), object) + cls._init_ids(self) + self.attributes = {} return self @classmethod - def from_table(cls, domain, source, row_indices=...): + def from_table(cls, domain, source, row_indices=..., *, copy=False): """ Create a new table from selected columns and/or rows of an existing one. The columns are chosen using a domain. The domain may also include @@ -475,6 +630,8 @@ def from_table(cls, domain, source, row_indices=...): :type source: Orange.data.Table :param row_indices: indices of the rows to include :type row_indices: a slice or a sequence + :param copy: if True, copy all tables (default: False, create views) + :type copy: bool :return: a new table :rtype: Orange.data.Table """ @@ -491,12 +648,13 @@ def from_table(cls, domain, source, row_indices=...): if cached is not None: return cached if domain is source.domain: - table = cls.from_table_rows(source, row_indices) + table = cls.from_table_rows(source, row_indices, copy=copy) # assure resulting domain is the instance passed on input table.domain = domain # since sparse flags are not considered when checking for # domain equality, fix manually. - table = assure_domain_conversion_sparsity(table, source) + with table.unlocked(): + table = assure_domain_conversion_sparsity(table, source) return table if row_indices is ...: @@ -520,78 +678,81 @@ def from_table(cls, domain, source, row_indices=...): # if an array can be a subarray of the input table, this needs to be done # on the whole table, because this avoids needless copies of contents - for array_conv in table_conversion.subarray: - out = array_conv.get_subarray(source, row_indices, n_rows) - setattr(self, array_conv.target, out) - - parts = {} + with self.unlocked(): + for array_conv in table_conversion.subarray: + out = array_conv.get_subarray(source, row_indices, n_rows) + setattr(self, array_conv.target, out) - for array_conv in table_conversion.columnwise: - if array_conv.is_sparse: - parts[array_conv.target] = [] - else: - # F-order enables faster writing to the array while accessing and - # matrix operations work with same speed (e.g. dot) - parts[array_conv.target] = \ - np.zeros((n_rows, len(array_conv.src_cols)), - order="F", dtype=array_conv.dtype) + parts = {} - if n_rows <= PART: for array_conv in table_conversion.columnwise: - out = array_conv.get_columns(source, row_indices, n_rows, - parts[array_conv.target], - ...) - setattr(self, array_conv.target, out) - else: - i_done = 0 - - while i_done < n_rows: - target_indices = slice(i_done, min(n_rows, i_done + PART)) - if row_indices is ...: - source_indices = target_indices - elif isinstance(row_indices, slice): - r = row_indices_range[target_indices] - source_indices = slice(r.start, r.stop, r.step) + if array_conv.is_sparse: + parts[array_conv.target] = [] else: - source_indices = row_indices[target_indices] - part_rows = min(n_rows, i_done+PART) - i_done + # F-order enables faster writing to the array while accessing and + # matrix operations work with same speed (e.g. dot) + parts[array_conv.target] = \ + np.zeros((n_rows, len(array_conv.src_cols)), + order="F", dtype=array_conv.dtype) + if n_rows <= PART: for array_conv in table_conversion.columnwise: - out = array_conv.get_columns(source, source_indices, part_rows, + out = array_conv.get_columns(source, row_indices, n_rows, parts[array_conv.target], - target_indices) - if array_conv.is_sparse: # dense arrays are populated in-place - parts[array_conv.target].append(out) - - i_done += PART - - # clear cache after a part is done - if new_cache: - _thread_local.conversion_cache = {} + ...) + setattr(self, array_conv.target, out) + else: + i_done = 0 + + while i_done < n_rows: + target_indices = slice(i_done, min(n_rows, i_done + PART)) + if row_indices is ...: + source_indices = target_indices + elif isinstance(row_indices, slice): + r = row_indices_range[target_indices] + source_indices = slice(r.start, r.stop, r.step) + else: + source_indices = row_indices[target_indices] + part_rows = min(n_rows, i_done+PART) - i_done + + for array_conv in table_conversion.columnwise: + out = array_conv.get_columns(source, source_indices, part_rows, + parts[array_conv.target], + target_indices) + if array_conv.is_sparse: # dense arrays are populated in-place + parts[array_conv.target].append(out) + + i_done += PART + + # clear cache after a part is done + if new_cache: + _thread_local.conversion_cache = {} - for array_conv in table_conversion.columnwise: - cparts = parts[array_conv.target] - out = cparts if not array_conv.is_sparse else sp.vstack(cparts) - setattr(self, array_conv.target, out) + for array_conv in table_conversion.columnwise: + cparts = parts[array_conv.target] + out = cparts if not array_conv.is_sparse else sp.vstack(cparts) + setattr(self, array_conv.target, out) - if source.has_weights(): - self.W = source.W[row_indices] - else: - self.W = np.empty((n_rows, 0)) - self.name = getattr(source, 'name', '') - if hasattr(source, 'ids'): - self.ids = source.ids[row_indices] - else: - cls._init_ids(self) - self.attributes = getattr(source, 'attributes', {}) - _idcache_save(_thread_local.conversion_cache, (domain, source), self) + if source.has_weights(): + self.W = source.W[row_indices] + else: + self.W = np.empty((n_rows, 0)) + self.name = getattr(source, 'name', '') + if hasattr(source, 'ids'): + self.ids = source.ids[row_indices] + else: + cls._init_ids(self) + self.attributes = getattr(source, 'attributes', {}) + _idcache_save(_thread_local.conversion_cache, (domain, source), self) + if copy: + self.ensure_copy() return self finally: if new_cache: _thread_local.conversion_cache = None _thread_local.domain_cache = None - def transform(self, domain): + def transform(self, domain, copy=False): """ Construct a table with a different domain. @@ -614,10 +775,10 @@ def transform(self, domain): Returns: A new table """ - return type(self).from_table(domain, self) + return type(self).from_table(domain, self, copy=copy) @classmethod - def from_table_rows(cls, source, row_indices): + def from_table_rows(cls, source, row_indices, *, copy=False): """ Construct a new table by selecting rows from the source table. @@ -625,22 +786,31 @@ def from_table_rows(cls, source, row_indices): :type source: Orange.data.Table :param row_indices: indices of the rows to include :type row_indices: a slice or a sequence + :param copy: if True, copy all tables (default: False, create views) + :type copy: bool :return: a new table :rtype: Orange.data.Table """ + def get_rows(a): + a = a[row_indices] + if isinstance(row_indices, slice) or row_indices is ... or copy: + a = a.copy() + return a + self = cls() self.domain = source.domain - self.X = source.X[row_indices] - if self.X.ndim == 1: - self.X = self.X.reshape(-1, len(self.domain.attributes)) - self.Y = source._Y[row_indices] - self.metas = source.metas[row_indices] - if self.metas.ndim == 1: - self.metas = self.metas.reshape(-1, len(self.domain.metas)) - self.W = source.W[row_indices] - self.name = getattr(source, 'name', '') - self.ids = np.array(source.ids[row_indices]) - self.attributes = getattr(source, 'attributes', {}) + with self.unlocked(): + self.X = get_rows(source.X) + if self.X.ndim == 1: + self.X = self.X.reshape(-1, len(self.domain.attributes)) + self.Y = get_rows(source.Y) + self.metas = get_rows(source.metas) + if self.metas.ndim == 1: + self.metas = self.metas.reshape(-1, len(self.domain.metas)) + self.W = get_rows(source.W) + self.name = getattr(source, 'name', '') + self.ids = np.array(source.ids[row_indices]) + self.attributes = getattr(source, 'attributes', {}) return self @classmethod @@ -668,30 +838,37 @@ def from_numpy(cls, domain, X, Y=None, metas=None, W=None, metas, = _check_arrays(metas, dtype=object, shape_1=X.shape[0]) ids, = _check_arrays(ids, dtype=int, shape_1=X.shape[0]) - if Y is not None and Y.ndim == 1: - Y = Y.reshape(Y.shape[0], 1) if domain is None: domain = Domain.from_numpy(X, Y, metas) if Y is None: - if sp.issparse(X): + if not domain.class_vars or sp.issparse(X): Y = np.empty((X.shape[0], 0), dtype=np.float64) else: + own_data = X.flags.owndata and X.base is None Y = X[:, len(domain.attributes):] X = X[:, :len(domain.attributes)] + if own_data: + Y = Y.copy() + X = X.copy() if metas is None: metas = np.empty((X.shape[0], 0), object) if W is None or W.size == 0: W = np.empty((X.shape[0], 0)) - else: - W = W.reshape(W.size) + elif W.shape != (W.size, ): + W = W.reshape(W.size).copy() if X.shape[1] != len(domain.attributes): raise ValueError( "Invalid number of variable columns ({} != {})".format( X.shape[1], len(domain.attributes)) ) - if Y.shape[1] != len(domain.class_vars): + if Y.ndim == 1: + if not domain.class_var: + raise ValueError( + "Invalid number of class columns " + f"(1 != {len(domain.class_vars)})") + elif Y.shape[1] != len(domain.class_vars): raise ValueError( "Invalid number of class columns ({} != {})".format( Y.shape[1], len(domain.class_vars)) @@ -706,17 +883,18 @@ def from_numpy(cls, domain, X, Y=None, metas=None, W=None, "Parts of data contain different numbers of rows.") self = cls() - self.domain = domain - self.X = X - self.Y = Y - self.metas = metas - self.W = W - self.n_rows = self.X.shape[0] - if ids is None: - cls._init_ids(self) - else: - self.ids = ids - self.attributes = {} if attributes is None else attributes + with self.unlocked(): + self.domain = domain + self.X = X + self.Y = Y + self.metas = metas + self.W = W + self.n_rows = self.X.shape[0] + if ids is None: + cls._init_ids(self) + else: + self.ids = ids + self.attributes = {} if attributes is None else attributes return self @classmethod @@ -724,21 +902,24 @@ def from_list(cls, domain, rows, weights=None): if weights is not None and len(rows) != len(weights): raise ValueError("mismatching number of instances and weights") self = cls.from_domain(domain, len(rows), weights is not None) - attrs, classes = domain.attributes, domain.class_vars - metas = domain.metas - nattrs, ncls = len(domain.attributes), len(domain.class_vars) - for i, row in enumerate(rows): - if isinstance(row, Instance): - row = row.list - for j, (var, val) in enumerate(zip(attrs, row)): - self.X[i, j] = var.to_val(val) - for j, (var, val) in enumerate(zip(classes, row[nattrs:])): - self._Y[i, j] = var.to_val(val) - for j, (var, val) in enumerate(zip(metas, row[nattrs + ncls:])): - self.metas[i, j] = var.to_val(val) - if weights is not None: - self.W = np.array(weights) - self.attributes = {} + all_vars = domain.variables + domain.metas + nattrs = len(domain.attributes) + nattrscls = len(domain.variables) + with self.unlocked(): + for i, row in enumerate(rows): + if isinstance(row, Instance): + row = row.list + vals = [var.to_val(val) for var, val in zip(all_vars, row)] + self.X[i] = vals[:nattrs] + if self._Y.ndim == 1: + self._Y[i] = vals[nattrs] if nattrs < len(vals) else np.nan + else: + self._Y[i] = vals[nattrs:nattrscls] + # for backward compatibility: allow omittine some (or all) metas + self.metas[i, :len(vals) - nattrscls] = vals[nattrscls:] + if weights is not None: + self.W = np.array(weights) + self.attributes = {} return self @classmethod @@ -859,15 +1040,15 @@ def from_url(cls, url): # Set the row of table data matrices # noinspection PyProtectedMember def _set_row(self, example, row): + # pylint: disable=protected-access domain = self.domain if isinstance(example, Instance): if example.domain == domain: - if isinstance(example, RowInstance): - self.X[row] = example._x - self._Y[row] = example._y + self.X[row] = example._x + if self._Y.ndim == 1: + self._Y[row] = float(example._y) else: - self.X[row] = example._x - self._Y[row] = example._y + self._Y[row] = np.atleast_1d(example._y) self.metas[row] = example._metas return @@ -881,12 +1062,16 @@ def _set_row(self, example, row): type(self)._next_instance_id += 1 else: - self.X[row] = [var.to_val(val) - for var, val in zip(domain.attributes, example)] - self._Y[row] = [var.to_val(val) - for var, val in - zip(domain.class_vars, - example[len(domain.attributes):])] + attrs = domain.attributes + if len(example) != len(domain.variables): + raise ValueError("invalid length") + self._X[row] = [var.to_val(val) for var, val in zip(attrs, example)] + if self._Y.ndim == 1: + self._Y[row] = domain.class_var.to_val(example[len(attrs)]) + else: + self._Y[row] = [var.to_val(val) + for var, val in zip(domain.class_vars, + example[len(attrs):])] self.metas[row] = np.array([var.Unknown for var in domain.metas], dtype=object) @@ -911,6 +1096,8 @@ def __getitem__(self, key): var = self.domain[col_idx] if 0 <= col_idx < len(self.domain.attributes): val = self.X[row_idx, col_idx] + elif col_idx == len(self.domain.attributes) and self._Y.ndim == 1: + val = self._Y[row_idx] elif col_idx >= len(self.domain.attributes): val = self._Y[row_idx, col_idx - len(self.domain.attributes)] @@ -981,6 +1168,8 @@ def __setitem__(self, key, value): if col_idx >= 0: if col_idx < self.X.shape[1]: self.X[row_idx, col_idx] = val + elif self._Y.ndim == 1 and col_idx == self.X.shape[1]: + self._Y[row_idx] = val else: self._Y[row_idx, col_idx - self.X.shape[1]] = val else: @@ -995,12 +1184,16 @@ def __setitem__(self, key, value): if not attributes: attributes = self.domain.attributes for var, col in zip(attributes, col_indices): + val = var.to_val(value) if 0 <= col < n_attrs: - self.X[row_idx, col] = var.to_val(value) + self.X[row_idx, col] = val elif col >= n_attrs: - self._Y[row_idx, col - n_attrs] = var.to_val(value) + if self._Y.ndim == 1 and col == n_attrs: + self._Y[row_idx] = val + else: + self._Y[row_idx, col - n_attrs] = val else: - self.metas[row_idx, -1 - col] = var.to_val(value) + self.metas[row_idx, -1 - col] = val else: attr_cols = np.fromiter( (col for col in col_indices if 0 <= col < n_attrs), int) @@ -1018,7 +1211,14 @@ def __setitem__(self, key, value): if len(attr_cols): self.X[row_idx, attr_cols] = value if len(class_cols): - self._Y[row_idx, class_cols] = value + if self._Y.ndim == 1 and np.all(class_cols == 0): + if isinstance(value, np.ndarray): + yshape = self._Y[row_idx].shape + if value.shape != yshape: + value = value.reshape(yshape) + self._Y[row_idx] = value + else: + self._Y[row_idx, class_cols] = value if len(meta_cols): self.metas[row_idx, meta_cols] = value @@ -1156,39 +1356,41 @@ def add_column(self, variable, data, to_metas=None): table (Table): a new table with the additional column """ dom = self.domain - attrs, classes, metas = dom.attributes, dom.class_vars, dom.metas - if to_metas or not variable.is_primitive(): - metas += (variable, ) + attrs, classes, metavars = dom.attributes, dom.class_vars, dom.metas + to_metas = to_metas or not variable.is_primitive() + if to_metas: + metavars += (variable, ) else: attrs += (variable, ) - domain = Domain(attrs, classes, metas) + domain = Domain(attrs, classes, metavars) new_table = self.transform(domain) - new_table.get_column_view(variable)[0][:] = data + with new_table.unlocked(new_table.metas if to_metas else new_table.X): + new_table.get_column_view(variable)[0][:] = data return new_table def is_view(self): """ Return `True` if all arrays represent a view referring to another table """ - return ((not self.X.shape[-1] or self.X.base is not None) and + return ((not self._X.shape[-1] or self._X.base is not None) and (not self._Y.shape[-1] or self._Y.base is not None) and - (not self.metas.shape[-1] or self.metas.base is not None) and - (not self._weights.shape[-1] or self.W.base is not None)) + (not self._metas.shape[-1] or self._metas.base is not None) and + (not self._weights.shape[-1] or self._W.base is not None)) def is_copy(self): """ Return `True` if the table owns its data """ - return ((not self.X.shape[-1] or self.X.base is None) and + return ((not self._X.shape[-1] or self._X.base is None) and (self._Y.base is None) and - (self.metas.base is None) and - (self.W.base is None)) + (self._metas.base is None) and + (self._W.base is None)) def is_sparse(self): """ Return `True` if the table stores data in sparse format """ - return any(sp.issparse(i) for i in [self.X, self.Y, self.metas]) + return any(sp.issparse(i) for i in [self._X, self._Y, self._metas]) def ensure_copy(self): """ @@ -1200,14 +1402,14 @@ def is_view(x): # them creates copies in constructor we can skip this check here. return not sp.issparse(x) and x.base is not None - if is_view(self.X): - self.X = self.X.copy() + if is_view(self._X): + self._X = self._X.copy() if is_view(self._Y): self._Y = self._Y.copy() - if is_view(self.metas): - self.metas = self.metas.copy() - if is_view(self.W): - self.W = self.W.copy() + if is_view(self._metas): + self._metas = self._metas.copy() + if is_view(self._W): + self._W = self._W.copy() def copy(self): """ @@ -1290,11 +1492,11 @@ def checksum(self, include_metas=True): # (after pickling and unpickling such arrays, checksum changes) # Why, and should we fix it or remove it? """Return a checksum over X, Y, metas and W.""" - cs = zlib.adler32(np.ascontiguousarray(self.X)) + cs = zlib.adler32(np.ascontiguousarray(self._X)) cs = zlib.adler32(np.ascontiguousarray(self._Y), cs) if include_metas: - cs = zlib.adler32(np.ascontiguousarray(self.metas), cs) - cs = zlib.adler32(np.ascontiguousarray(self.W), cs) + cs = zlib.adler32(np.ascontiguousarray(self._metas), cs) + cs = zlib.adler32(np.ascontiguousarray(self._W), cs) return cs def shuffle(self): @@ -1332,6 +1534,8 @@ def rx(M): if col_index >= 0: if col_index < self.X.shape[1]: col = rx(self.X[:, col_index]) + elif self._Y.ndim == 1 and col_index == self._X.shape[1]: + col = rx(self._Y) else: col = rx(self._Y[:, col_index - self.X.shape[1]]) else: @@ -1356,7 +1560,10 @@ def _sp_anynan(a): if sp.issparse(self._Y): remove += _sp_anynan(self._Y) else: - remove += bn.anynan(self._Y, axis=1) + if self._Y.ndim == 1: + remove += np.isnan(self._Y) + else: + remove += bn.anynan(self._Y, axis=1) if sp.issparse(self.metas): remove += _sp_anynan(self._metas) else: @@ -1388,9 +1595,15 @@ def _filter_has_class(self, negate=False): retain = (self._Y.indptr[1:] == self._Y.indptr[-1:] + self._Y.shape[1]) else: - retain = bn.anynan(self._Y, axis=1) + if self._Y.ndim == 1: + retain = np.isnan(self._Y) + else: + retain = bn.anynan(self._Y, axis=1) if not negate: retain = np.logical_not(retain) + # TODO: Decide whether to keep this or not; + if np.all(retain): + return self return self.from_table_rows(self, retain) def _filter_same_value(self, column, value, negate=False): @@ -1622,12 +1835,12 @@ def _compute_basic_stats(self, columns=None, if compute_variance: raise NotImplementedError("computation of variance is " "not implemented yet") - W = self.W if self.has_weights() else None + W = self._W if self.has_weights() else None rr = [] stats = [] if not columns: if self.domain.attributes: - rr.append(fast_stats(self.X, W)) + rr.append(fast_stats(self._X, W)) if self.domain.class_vars: rr.append(fast_stats(self._Y, W)) if include_metas and self.domain.metas: @@ -1639,11 +1852,14 @@ def _compute_basic_stats(self, columns=None, for column in columns: c = self.domain.index(column) if 0 <= c < nattrs: - S = fast_stats(self.X[:, [c]], W and W[:, [c]]) + S = fast_stats(self._X[:, [c]], W and W[:, [c]]) elif c >= nattrs: - S = fast_stats(self._Y[:, [c - nattrs]], W and W[:, [c - nattrs]]) + if self._Y.ndim == 1 and c == nattrs: + S = fast_stats(self._Y[:, None], W and W[:, None]) + else: + S = fast_stats(self._Y[:, [c - nattrs]], W and W[:, [c - nattrs]]) else: - S = fast_stats(self.metas[:, [-1 - c]], W and W[:, [-1 - c]]) + S = fast_stats(self._metas[:, [-1 - c]], W and W[:, [-1 - c]]) stats.append(S[0]) return stats @@ -1654,8 +1870,10 @@ def _compute_distributions(self, columns=None): columns = [self.domain.index(var) for var in columns] distributions = [] - if sp.issparse(self.X): - self.X = self.X.tocsc() + X = self.X + if sp.issparse(X): + X = X.tocsc() + W = self.W.ravel() if self.has_weights() else None @@ -1663,14 +1881,16 @@ def _compute_distributions(self, columns=None): variable = self.domain[col] # Select the correct data column from X, Y or metas - if 0 <= col < self.X.shape[1]: - x = self.X[:, col] + if 0 <= col < X.shape[1]: + x = X[:, col] elif col < 0: x = self.metas[:, col * (-1) - 1] if np.issubdtype(x.dtype, np.dtype(object)): x = x.astype(float) + elif self._Y.ndim == 1 and col == X.shape[1]: + x = self._Y else: - x = self._Y[:, col - self.X.shape[1]] + x = self._Y[:, col - X.shape[1]] if variable.is_discrete: dist, unknowns = bincount(x, weights=W, max_val=len(variable.values) - 1) @@ -1729,6 +1949,8 @@ def _compute_contingency(self, col_vars=None, row_var=None): row_data = self.X[:, row_indi] elif row_indi < 0: row_data = self.metas[:, -1 - row_indi] + elif self._Y.ndim == 1 and row_indi == n_atts: + row_data = self._Y else: row_data = self._Y[:, row_indi - n_atts] @@ -1774,8 +1996,9 @@ def _compute_contingency(self, col_vars=None, row_var=None): nans_rows[arr_i], nans[arr_i]) else: for col_i, arr_i, var in disc_vars: + col = arr if arr.ndim == 1 else arr[:, arr_i] contingencies[col_i] = contingency( - arr[:, arr_i].astype(float), + col.astype(float), row_data, len(var.values) - 1, n_rows - 1, W) cont_vars = [v for v in vars if v[2].is_continuous] @@ -1842,107 +2065,108 @@ def transpose(cls, table, feature_names_column="", # attributes # - classes and metas to attributes of attributes # - arbitrary meta column to feature names - self.X = table.X.T - if attr_index is not None: - self.X = np.delete(self.X, attr_index, 0) - if feature_names_column: - names = [str(row[feature_names_column]) for row in table] - progress_callback(0.1) - names = get_unique_names_duplicates(names) - progress_callback(0.3) - attributes = [ContinuousVariable(name) for name in names] - else: - places = int(np.ceil(np.log10(n_cols))) if n_cols else 1 - attributes = [ContinuousVariable(f"{feature_name} {i:0{places}}") - for i in range(1, n_cols + 1)] - progress_callback(0.4) - - if old_domain is not None and feature_names_column: - for i, _ in enumerate(attributes): - if attributes[i].name in old_domain: - var = old_domain[attributes[i].name] - attr = ContinuousVariable(var.name) if var.is_continuous \ - else DiscreteVariable(var.name, var.values) - attr.attributes = var.attributes.copy() - attributes[i] = attr - - def set_attributes_of_attributes(_vars, _table): - for i, variable in enumerate(_vars): - if variable.name == feature_names_column: - continue - for j, row in enumerate(_table): - value = variable.repr_val(row) if np.isscalar(row) \ - else row[i] if isinstance(row[i], str) \ - else variable.repr_val(row[i]) - - if value not in MISSING_VALUES: - attributes[j].attributes[variable.name] = value - - set_attributes_of_attributes(table.domain.class_vars, table.Y) - progress_callback(0.5) - set_attributes_of_attributes(table.domain.metas, table.metas) - - # weights - self.W = np.empty((self.n_rows, 0)) - - def get_table_from_attributes_of_attributes(_vars, _dtype=float): - T = np.empty((self.n_rows, len(_vars)), dtype=_dtype) - for i, _attr in enumerate(table_domain_attributes): - for j, _var in enumerate(_vars): - val = str(_attr.attributes.get(_var.name, "")) - if not _var.is_string: - val = np.nan if val in MISSING_VALUES else \ - _var.values.index(val) if \ - _var.is_discrete else float(val) - T[i, j] = val - return T - - # class_vars - attributes of attributes to class - from old domain - class_vars = [] - if old_domain is not None: - class_vars = old_domain.class_vars - self.Y = get_table_from_attributes_of_attributes(class_vars) - - # metas - # - feature names and attributes of attributes to metas - self.metas, metas = np.empty((self.n_rows, 0), dtype=object), [] - if meta_attr_name not in [m.name for m in table.domain.metas] and \ - table_domain_attributes: - self.metas = np.array([[a.name] for a in table_domain_attributes], - dtype=object) - metas.append(StringVariable(meta_attr_name)) - - names = chain.from_iterable(list(attr.attributes) - for attr in table_domain_attributes) - names = sorted(set(names) - {var.name for var in class_vars}) - progress_callback(0.6) - - def guessed_var(i, var_name): - orig_vals = M[:, i] - val_map, vals, var_type = Orange.data.io.guess_data_type(orig_vals) - values, variable = Orange.data.io.sanitize_variable( - val_map, vals, orig_vals, var_type, {}, name=var_name) - M[:, i] = values - return variable - - _metas = [StringVariable(n) for n in names] - if old_domain is not None: - _metas = [m for m in old_domain.metas if m.name != meta_attr_name] - M = get_table_from_attributes_of_attributes(_metas, _dtype=object) - progress_callback(0.7) - if old_domain is None: - _metas = [guessed_var(i, m.name) for i, m in enumerate(_metas)] - if _metas: - self.metas = np.hstack((self.metas, M)) - metas.extend(_metas) - - self.domain = Domain(attributes, class_vars, metas) - progress_callback(0.9) - cls._init_ids(self) - self.attributes = table.attributes.copy() - self.attributes["old_domain"] = table.domain - progress_callback(1) - return self + with self.unlocked(): + self.X = table.X.T + if attr_index is not None: + self.X = np.delete(self.X, attr_index, 0) + if feature_names_column: + names = [str(row[feature_names_column]) for row in table] + progress_callback(0.1) + names = get_unique_names_duplicates(names) + progress_callback(0.3) + attributes = [ContinuousVariable(name) for name in names] + else: + places = int(np.ceil(np.log10(n_cols))) if n_cols else 1 + attributes = [ContinuousVariable(f"{feature_name} {i:0{places}}") + for i in range(1, n_cols + 1)] + progress_callback(0.4) + + if old_domain is not None and feature_names_column: + for i, _ in enumerate(attributes): + if attributes[i].name in old_domain: + var = old_domain[attributes[i].name] + attr = ContinuousVariable(var.name) if var.is_continuous \ + else DiscreteVariable(var.name, var.values) + attr.attributes = var.attributes.copy() + attributes[i] = attr + + def set_attributes_of_attributes(_vars, _table): + for i, variable in enumerate(_vars): + if variable.name == feature_names_column: + continue + for j, row in enumerate(_table): + value = variable.repr_val(row) if np.isscalar(row) \ + else row[i] if isinstance(row[i], str) \ + else variable.repr_val(row[i]) + + if value not in MISSING_VALUES: + attributes[j].attributes[variable.name] = value + + set_attributes_of_attributes(table.domain.class_vars, table.Y) + progress_callback(0.5) + set_attributes_of_attributes(table.domain.metas, table.metas) + + # weights + self.W = np.empty((self.n_rows, 0)) + + def get_table_from_attributes_of_attributes(_vars, _dtype=float): + T = np.empty((self.n_rows, len(_vars)), dtype=_dtype) + for i, _attr in enumerate(table_domain_attributes): + for j, _var in enumerate(_vars): + val = str(_attr.attributes.get(_var.name, "")) + if not _var.is_string: + val = np.nan if val in MISSING_VALUES else \ + _var.values.index(val) if \ + _var.is_discrete else float(val) + T[i, j] = val + return T + + # class_vars - attributes of attributes to class - from old domain + class_vars = [] + if old_domain is not None: + class_vars = old_domain.class_vars + self.Y = get_table_from_attributes_of_attributes(class_vars) + + # metas + # - feature names and attributes of attributes to metas + self.metas, metas = np.empty((self.n_rows, 0), dtype=object), [] + if meta_attr_name not in [m.name for m in table.domain.metas] and \ + table_domain_attributes: + self.metas = np.array([[a.name] for a in table_domain_attributes], + dtype=object) + metas.append(StringVariable(meta_attr_name)) + + names = chain.from_iterable(list(attr.attributes) + for attr in table_domain_attributes) + names = sorted(set(names) - {var.name for var in class_vars}) + progress_callback(0.6) + + def guessed_var(i, var_name): + orig_vals = M[:, i] + val_map, vals, var_type = Orange.data.io.guess_data_type(orig_vals) + values, variable = Orange.data.io.sanitize_variable( + val_map, vals, orig_vals, var_type, {}, name=var_name) + M[:, i] = values + return variable + + _metas = [StringVariable(n) for n in names] + if old_domain is not None: + _metas = [m for m in old_domain.metas if m.name != meta_attr_name] + M = get_table_from_attributes_of_attributes(_metas, _dtype=object) + progress_callback(0.7) + if old_domain is None: + _metas = [guessed_var(i, m.name) for i, m in enumerate(_metas)] + if _metas: + self.metas = np.hstack((self.metas, M)) + metas.extend(_metas) + + self.domain = Domain(attributes, class_vars, metas) + progress_callback(0.9) + cls._init_ids(self) + self.attributes = table.attributes.copy() + self.attributes["old_domain"] = table.domain + progress_callback(1) + return self def to_sparse(self, sparse_attributes=True, sparse_class=False, sparse_metas=False): @@ -1979,6 +2203,19 @@ def densify(features): return t +def _dereferenced(array): + # CSR and CSC matrices are constructed so that array.data is a + # view to a base, which prevents unlocking them. Therefore, if + # sparse matrix doesn't own its data, but its base array is + # referenced only by this matrix, we copy it. This doesn't + # increase memory use, but allows unlocking. + if sp.issparse(array) \ + and array.data.base is not None \ + and sys.getrefcount(array.data.base) == 2: # 2 = 1 real + 1 for arg + array.data = array.data.copy() + return array + + def _check_arrays(*arrays, dtype=None, shape_1=None): checked = [] if not len(arrays): @@ -2006,6 +2243,7 @@ def ninstances(array): if not (sp.isspmatrix_csr(array) or sp.isspmatrix_csc(array)): array = array.tocsr() array.data = np.asarray(array.data) + array = _dereferenced(array) has_inf = _check_inf(array.data) else: if dtype is not None: @@ -2029,10 +2267,31 @@ def _check_inf(array): def _subarray(arr, rows, cols): rows = _optimize_indices(rows, arr.shape[0]) + if arr.ndim == 1: + return arr[rows] cols = _optimize_indices(cols, arr.shape[1]) return arr[_rxc_ix(rows, cols)] +# TODO: Pick that one or the one above +def _subarray2(arr, rows, cols): + rows = _optimize_indices(rows, arr.shape[0]) + if arr.ndim == 1: + sub_arr = arr[rows] + else: + cols = _optimize_indices(cols, arr.shape[1]) + sub_arr = arr[_rxc_ix(rows, cols)] + # Don't return a view when it is the same as the original + if isinstance(sub_arr, np.ndarray) \ + and type(sub_arr) is type(arr) \ + and sub_arr.base is arr \ + and sub_arr.shape == arr.shape \ + and sub_arr.strides == arr.strides: + return arr + else: + return sub_arr + + def _optimize_indices(indices, maxlen): """ Convert integer indices to slice if possible. It only converts increasing diff --git a/Orange/data/tests/test_pandas.py b/Orange/data/tests/test_pandas.py index 6182c702389..97714bba76e 100644 --- a/Orange/data/tests/test_pandas.py +++ b/Orange/data/tests/test_pandas.py @@ -373,7 +373,7 @@ def setUp(self): [0, 1, 0, 1, 1, 2, 1] + [0, 0, 0, 0, 4, 1, 1] + "a b c d e f g".split() + - list("ABCDEF") + [""], dtype=object).reshape(-1, 7).T + list("ABCDEF") + [""], dtype=object).reshape(-1, 7).T.copy() self.table = Table.from_numpy( self.domain, np.array( @@ -429,10 +429,12 @@ def test_to_dfs(self): ), 1) def test_amend(self): - df = self.table.X_df - df.iloc[0][0] = 0 + with self.table.unlocked(): + df = self.table.X_df + df.iloc[0][0] = 0 X = self.table.X - self.table.X_df = df + with self.table.unlocked(): + self.table.X_df = df self.assertTrue(np.shares_memory(df.values, X)) def test_amend_dimension_mismatch(self): diff --git a/Orange/data/tests/test_table.py b/Orange/data/tests/test_table.py index 836110276b3..dd0a3407248 100644 --- a/Orange/data/tests/test_table.py +++ b/Orange/data/tests/test_table.py @@ -162,10 +162,12 @@ def test_concatenate_horizontal(self): tab1 = self._new_table((a, b), (c, ), (), 0) tab1.attributes = dict(a=5, b=7) tab2 = self._new_table((d, ), (e, ), (), 1000) - tab2.W = np.arange(5) + with tab2.unlocked(): + tab2.W = np.arange(5) tab3 = self._new_table((f, g), (), (), 2000) tab3.attributes = dict(a=1, c=4) - tab3.W = np.arange(5, 10) + with tab3.unlocked(): + tab3.W = np.arange(5, 10) joined = Table.concatenate((tab1, tab2, tab3), axis=1) domain = joined.domain self.assertEqual(domain.attributes, (a, b, d, f, g)) @@ -265,7 +267,7 @@ def setUp(self): [0, 1, 0, 1, 1, np.nan, 1] + [0, 0, 0, 0, np.nan, 1, 1] + "a b c d e f g".split() + - list("ABCDEF") + [""], dtype=object).reshape(-1, 7).T + list("ABCDEF") + [""], dtype=object).reshape(-1, 7).T.copy() self.table = Table.from_numpy( self.domain, np.array( @@ -331,7 +333,8 @@ def test_row_filter_continuous(self): self.assertEqual(list(filtered.metas[:, -2].flatten()), ["a"]) def test_row_filter_string(self): - self.table.metas[:, -1] = self.table.metas[::-1, -2] + with self.table.unlocked(): + self.table.metas[:, -1] = self.table.metas[::-1, -2] val_filter = Values([ FilterString(None, FilterString.Between, "c", "e")]) filtered = val_filter(self.table) diff --git a/Orange/distance/distance.py b/Orange/distance/distance.py index aca1679d4a8..a08752d6b9d 100644 --- a/Orange/distance/distance.py +++ b/Orange/distance/distance.py @@ -516,9 +516,9 @@ def _compute_sparse(self, x1, x2=None): symmetric = x2 is None if symmetric: x2 = x1 - x1 = sp.csr_matrix(x1) + x1 = sp.csr_matrix(x1).copy() x1.eliminate_zeros() - x2 = sp.csr_matrix(x2) + x2 = sp.csr_matrix(x2).copy() x2.eliminate_zeros() n, m = x1.shape[0], x2.shape[0] matrix = np.zeros((n, m)) diff --git a/Orange/distance/tests/test_distance.py b/Orange/distance/tests/test_distance.py index 2ff10ce5678..81e586dfe4a 100644 --- a/Orange/distance/tests/test_distance.py +++ b/Orange/distance/tests/test_distance.py @@ -81,7 +81,8 @@ def is_same(d1, d2, fit1=None, fit2=None): data_const = Table(domain, np.hstack((X, np.ones((n, 1))))) data_nan = Table(domain, np.hstack((X, np.full((n, 1), np.nan)))) data_nan_1 = Table(domain, np.hstack((X, np.full((n, 1), np.nan)))) - data_nan_1.X[0, -1] = 1 + with data_nan_1.unlocked(): + data_nan_1.X[0, -1] = 1 is_same(data, data_const) is_same(data, data_nan) is_same(data, data_nan_1) @@ -176,7 +177,8 @@ def test_euclidean_disc(self): [2, 0, 2], [3, 2, 0]]))) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan model = distance.Euclidean().fit(data) assert_almost_equal(model.dist_missing_disc, [[1/2, 1/2, 1, 1], @@ -186,13 +188,15 @@ def test_euclidean_disc(self): assert_almost_equal(model.dist_missing2_disc, [1 - 2/4, 1 - 3/9, 1 - 5/9]) - dist = model(data) + with data.unlocked(): + dist = model(data) assert_almost_equal(dist, np.sqrt(np.array([[0, 2.5, 3], [2.5, 0, 1.5], [3, 1.5, 0]]))) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan model = distance.Euclidean().fit(data) assert_almost_equal(model.dist_missing_disc, [[1, 0, 1, 1], @@ -208,7 +212,8 @@ def test_euclidean_disc(self): [2, 1, 0]]))) data = self.disc_data4 - data.X[:2, 0] = np.nan + with data.unlocked(): + data.X[:2, 0] = np.nan model = distance.Euclidean().fit(data) assert_almost_equal(model.dist_missing_disc, @@ -237,7 +242,8 @@ def test_euclidean_cont(self): [5, 21, 0, 41], [38, 82, 41, 0]]))) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Euclidean(data, axis=1, normalize=False) assert_almost_equal( dist, @@ -246,7 +252,8 @@ def test_euclidean_cont(self): [2.236067977, 5.385164807, 0, 6.403124237], [6.164414003, 6.480740698, 6.403124237, 0]]) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan dist = distance.Euclidean(data, axis=1, normalize=False) assert_almost_equal( dist, @@ -280,7 +287,8 @@ def test_euclidean_cont_normalized(self): [1.146423008, 2.068662631, 0, 1.956673562], [1.621286967, 3.035242727, 1.956673562, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan model = distance.Euclidean(axis=1, normalize=True).fit(data) assert_almost_equal(model.means, [3, 2.75, 1.5]) assert_almost_equal(model.vars, [8, 2.1875, 1.25]) @@ -292,7 +300,8 @@ def test_euclidean_cont_normalized(self): [1.146423008, 2.192519751, 0, 2.019547333], [1.696635326, 2.675283697, 2.019547333, 0]]) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan model = distance.Euclidean(axis=1, normalize=True).fit(data) assert_almost_equal(model.means, [4, 2.75, 1.5]) assert_almost_equal(model.vars, [9, 2.1875, 1.25]) @@ -315,7 +324,8 @@ def test_euclidean_cols(self): [8.062257748, 0, 5.196152423], [4.242640687, 5.196152423, 0]]) - data.X[1, 1] = np.nan + with data.unlocked(): + data.X[1, 1] = np.nan dist = distance.Euclidean(data, axis=0, normalize=False) assert_almost_equal( dist, @@ -323,7 +333,8 @@ def test_euclidean_cols(self): [6.218252702, 0, 2.581988897], [4.242640687, 2.581988897, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Euclidean(data, axis=0, normalize=False) assert_almost_equal( dist, @@ -342,7 +353,8 @@ def test_euclidean_cols_normalized(self): [2.455273959, 0, 2.473176308], [0.649839392, 2.473176308, 0]]) - data.X[1, 1] = np.nan + with data.unlocked(): + data.X[1, 1] = np.nan dist = distance.Euclidean(data, axis=0, normalize=True) assert_almost_equal( dist, @@ -350,7 +362,8 @@ def test_euclidean_cols_normalized(self): [2, 0, 1.704275472], [0.649839392, 1.704275472, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Euclidean(data, axis=0, normalize=True) assert_almost_equal( dist, @@ -451,7 +464,8 @@ def test_manhattan_disc(self): [2, 0, 2], [3, 2, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan model = distance.Manhattan().fit(data) assert_almost_equal(model.dist_missing_disc, [[1/2, 1/2, 1, 1], @@ -466,7 +480,8 @@ def test_manhattan_disc(self): [2.5, 0, 1.5], [3, 1.5, 0]]) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan model = distance.Manhattan().fit(data) assert_almost_equal(model.dist_missing_disc, [[1, 0, 1, 1], @@ -482,7 +497,8 @@ def test_manhattan_disc(self): [2, 1, 0]]) data = self.disc_data4 - data.X[:2, 0] = np.nan + with data.unlocked(): + data.X[:2, 0] = np.nan model = distance.Manhattan().fit(data) assert_almost_equal(model.dist_missing_disc, [[1/2, 1/2, 1, 1], @@ -510,7 +526,8 @@ def test_manhattan_cont(self): [6, 5, 0, 13], [9, 16, 13, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Manhattan(data, axis=1, normalize=False) assert_almost_equal( dist, @@ -519,7 +536,8 @@ def test_manhattan_cont(self): [6, 3, 0, 13], [9, 14, 13, 0]]) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan dist = distance.Manhattan(data, axis=1, normalize=False) assert_almost_equal( dist, @@ -553,7 +571,8 @@ def test_manhattan_cont_normalized(self): [1.833333333, 1.75, 0, 4.166666667], [3, 5.416666667, 4.166666667, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [2, 4.5, 1.5]) assert_almost_equal(model.mads, [1, 2, 1]) @@ -566,7 +585,8 @@ def test_manhattan_cont_normalized(self): [2, 1.25, 0, 5], [4, 5.75, 5, 0]]) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [4.5, 4.5, 1.5]) assert_almost_equal(model.mads, [2.5, 2, 1]) @@ -590,7 +610,8 @@ def test_manhattan_cols(self): [20, 0, 15], [7, 15, 0]]) - data.X[1, 1] = np.nan + with data.unlocked(): + data.X[1, 1] = np.nan dist = distance.Manhattan(data, axis=0, normalize=False) assert_almost_equal( dist, @@ -598,7 +619,8 @@ def test_manhattan_cols(self): [19, 0, 14], [7, 14, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Manhattan(data, axis=0, normalize=False) assert_almost_equal( dist, @@ -618,7 +640,8 @@ def test_manhattan_cols_normalized(self): [4.5833333, 0, 4.25], [2, 4.25, 0]]) - data.X[1, 1] = np.nan + with data.unlocked(): + data.X[1, 1] = np.nan dist = distance.Manhattan(data, axis=0, normalize=True) assert_almost_equal( dist, @@ -626,7 +649,8 @@ def test_manhattan_cols_normalized(self): [4.6666667, 0, 4], [2, 4, 0]]) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Manhattan(data, axis=0, normalize=True) assert_almost_equal( dist, @@ -638,7 +662,8 @@ def test_manhattan_mixed(self): assert_almost_equal = np.testing.assert_almost_equal data = self.mixed_data - data.X[2, 0] = 2 # prevent mads[0] = 0 + with data.unlocked(): + data.X[2, 0] = 2 # prevent mads[0] = 0 model = distance.Manhattan(axis=1, normalize=True).fit(data) assert_almost_equal(model.medians, [1, 3, 1]) assert_almost_equal(model.mads, [1, 2, 1]) @@ -699,9 +724,10 @@ def test_no_data(self): def test_cosine_disc(self): assert_almost_equal = np.testing.assert_almost_equal data = self.disc_data - data.X = np.array([[1, 0, 0], - [0, 1, 1], - [1, 3, 0]], dtype=float) + with data.unlocked(): + data.X = np.array([[1, 0, 0], + [0, 1, 1], + [1, 3, 0]], dtype=float) model = distance.Cosine().fit(data) assert_almost_equal(model.means, [2 / 3, 2 / 3, 1 / 3]) @@ -711,7 +737,8 @@ def test_cosine_disc(self): [0, 1, 0.5], [1 / sqrt(2), 0.5, 1]])) - data.X[1, 1] = np.nan + with data.unlocked(): + data.X[1, 1] = np.nan model = distance.Cosine().fit(data) assert_almost_equal(model.means, [2 / 3, 1 / 2, 1 / 3]) dist = model(data) @@ -721,10 +748,11 @@ def test_cosine_disc(self): [0, 1, 0.5 / sqrt(1.25) / sqrt(2)], [1 / sqrt(2), 0.5 / sqrt(1.25) / sqrt(2), 1]])) - data.X = np.array([[1, 0, 0], - [0, np.nan, 1], - [1, np.nan, 1], - [1, 3, 1]]) + with data.unlocked(): + data.X = np.array([[1, 0, 0], + [0, np.nan, 1], + [1, np.nan, 1], + [1, 3, 1]]) model = distance.Cosine().fit(data) dist = model(data) assert_almost_equal(model.means, [0.75, 0.5, 0.75]) @@ -746,7 +774,8 @@ def test_cosine_cont(self): [0.355097978, 0.925279678, 0.12011731, 0]] ) - data.X[1, 0] = np.nan + with data.unlocked(): + data.X[1, 0] = np.nan dist = distance.Cosine(data, axis=1) assert_almost_equal( dist, @@ -755,7 +784,8 @@ def test_cosine_cont(self): [0.0741799, 0.207881966, 0, 0.12011731], [0.355097978, 0.324809395, 0.12011731, 0]]) - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan dist = distance.Cosine(data, axis=1) assert_almost_equal( dist, @@ -767,9 +797,10 @@ def test_cosine_cont(self): def test_cosine_mixed(self): assert_almost_equal = np.testing.assert_almost_equal data = self.mixed_data - data.X = np.array([[1, 3, 2, 1, 0, 0], - [-1, 5, 0, 0, 1, 1], - [1, 1, 1, 1, 3, 0]], dtype=float) + with data.unlocked(): + data.X = np.array([[1, 3, 2, 1, 0, 0], + [-1, 5, 0, 0, 1, 1], + [1, 1, 1, 1, 3, 0]], dtype=float) model = distance.Cosine(axis=1).fit(data) assert_almost_equal(model.means, [1/3, 3, 1, 2/3, 2/3, 1/3]) @@ -782,8 +813,9 @@ def test_cosine_mixed(self): def test_two_tables(self): assert_almost_equal = np.testing.assert_almost_equal - self.cont_data.X[1, 0] = np.nan - self.cont_data2.X[1, 0] = np.nan + with self.cont_data.unlocked(), self.cont_data2.unlocked(): + self.cont_data.X[1, 0] = np.nan + self.cont_data2.X[1, 0] = np.nan dist = distance.Cosine(self.cont_data, self.cont_data2) assert_almost_equal( @@ -816,7 +848,8 @@ def test_cosine_cols(self): [0.711324865, 0, 0.44365136], [0.11050082, 0.44365136, 0]]) - data.X[1, 1] = np.nan + with data.unlocked(): + data.X[1, 1] = np.nan dist = distance.Cosine(data, axis=0, normalize=False) assert_almost_equal( dist, @@ -824,8 +857,9 @@ def test_cosine_cols(self): [0.47702364, 0, 0.181076975], [0.11050082, 0.181076975, 0]]) - data.X[1, 0] = np.nan - data.X[1, 2] = 2 + with data.unlocked(): + data.X[1, 0] = np.nan + data.X[1, 2] = 2 dist = distance.Cosine(data, axis=0, normalize=False) assert_almost_equal( dist, @@ -859,7 +893,8 @@ def test_jaccard_rows(self): [0, 1/3, 1/2, 1]])) X = self.data.X - X[1, 0] = X[2, 0] = X[3, 1] = np.nan + with self.data.unlocked(): + X[1, 0] = X[2, 0] = X[3, 1] = np.nan model = distance.Jaccard().fit(self.data) assert_almost_equal(model.ps, np.array([0.5, 2/3, 0.75])) @@ -881,10 +916,11 @@ def test_jaccard_cols(self): [1/4, 1, 2/3], [1/2, 2/3, 1]])) - self.data.X = np.array([[0, 1, 1], - [np.nan, np.nan, 1], - [np.nan, 0, 1], - [1, 1, 0]]) + with self.data.unlocked(): + self.data.X = np.array([[0, 1, 1], + [np.nan, np.nan, 1], + [np.nan, 0, 1], + [1, 1, 0]]) model = distance.Jaccard(axis=0).fit(self.data) assert_almost_equal(model.ps, [0.5, 2/3, 0.75]) assert_almost_equal( diff --git a/Orange/ensembles/stack.py b/Orange/ensembles/stack.py index 2cf918f66ef..4f8078acd71 100644 --- a/Orange/ensembles/stack.py +++ b/Orange/ensembles/stack.py @@ -30,8 +30,9 @@ def predict_storage(self, data): X = np.column_stack(pred) Y = np.repeat(np.nan, X.shape[0]) stacked_data = data.transform(self.aggregate.domain) - stacked_data.X = X - stacked_data.Y = Y + with stacked_data.unlocked(): + stacked_data.X = X + stacked_data.Y = Y return self.aggregate( stacked_data, Model.ValueProbs if self.use_prob else Model.Value) @@ -80,9 +81,10 @@ def fit_storage(self, data): dom = Domain([ContinuousVariable('f{}'.format(i + 1)) for i in range(X.shape[1])], data.domain.class_var) - stacked_data = data.transform(dom) - stacked_data.X = X - stacked_data.Y = res.actual + stacked_data = data.transform(dom, copy=True) + with stacked_data.unlocked(): + stacked_data.X = X + stacked_data.Y = res.actual models = [l(data) for l in self.learners] aggregate_model = self.aggregate(stacked_data) return StackedModel(models, aggregate_model, use_prob=use_prob, diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 400bef53047..8d37dafa232 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -311,7 +311,8 @@ def get_augmented_data(self, model_names, attrs = data.domain.attributes if include_attrs else [] domain = Domain(attrs, data.domain.class_vars, metas=new_meta_attr) predictions = data.transform(domain) - predictions.metas = new_meta_vals + with predictions.unlocked(predictions.metas): + predictions.metas = new_meta_vals predictions.name = data.name return predictions diff --git a/Orange/preprocess/_relieff.pyx b/Orange/preprocess/_relieff.pyx index 67b2c7a7712..11da65af362 100644 --- a/Orange/preprocess/_relieff.pyx +++ b/Orange/preprocess/_relieff.pyx @@ -362,7 +362,12 @@ cdef tuple prepare(X, y, is_discrete, contingencies): row_ptp[row_ptp == 0] = np.inf # Avoid zero-division X[:, is_continuous] -= row_min[is_continuous] X[:, is_continuous] /= row_ptp[is_continuous] - y = np.array(y, dtype=np.float64) + if y.ndim > 1: + if y.shape[1] > 1: + raise ValueError("ReliefF expects a single class") + y = np.array(y[:, 0], dtype=np.float64) + else: + y = np.array(y, dtype=np.float64) is_defined = np.logical_not(np.isnan(y)) X = X[is_defined] y = y[is_defined] diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 1334fbc8c0e..aa4724801a9 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -170,8 +170,9 @@ def __call__(self, data): assert X.shape[1] == len(features) domain = Orange.data.Domain(features, data.domain.class_vars, data.domain.metas) - new_data = data.transform(domain) - new_data.X = X + new_data = data.transform(domain, copy=True) + with new_data.unlocked(new_data.X): + new_data.X = X return new_data @@ -414,12 +415,13 @@ def __call__(self, data): rstate = np.random.RandomState(self.rand_seed) # ensure the same seed is not used to shuffle X and Y at the same time r1, r2, r3 = rstate.randint(0, 2 ** 32 - 1, size=3, dtype=np.int64) - if self.rand_type & Randomize.RandomizeClasses: - new_data.Y = self.randomize(new_data.Y, r1) - if self.rand_type & Randomize.RandomizeAttributes: - new_data.X = self.randomize(new_data.X, r2) - if self.rand_type & Randomize.RandomizeMetas: - new_data.metas = self.randomize(new_data.metas, r3) + with new_data.unlocked(): + if self.rand_type & Randomize.RandomizeClasses: + new_data.Y = self.randomize(new_data.Y, r1) + if self.rand_type & Randomize.RandomizeAttributes: + new_data.X = self.randomize(new_data.X, r2) + if self.rand_type & Randomize.RandomizeMetas: + new_data.metas = self.randomize(new_data.metas, r3) return new_data @staticmethod diff --git a/Orange/preprocess/transformation.py b/Orange/preprocess/transformation.py index 01ea719b5ce..43dcb4bf79a 100644 --- a/Orange/preprocess/transformation.py +++ b/Orange/preprocess/transformation.py @@ -36,7 +36,7 @@ def __call__(self, data): col = data.X else: col = data.metas - if not sp.issparse(col): + if not sp.issparse(col) and col.ndim > 1: col = col.squeeze(axis=1) transformed = self.transform(col) if inst: diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index a8080b6a3e0..87af6833f7a 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -355,7 +355,8 @@ def weighted_mean(): np.nanmin(X, axis=0), np.nanmax(X, axis=0), np.nanmean(X, axis=0) if not weighted else weighted_mean(), - np.nanvar(X, axis=0) if compute_variance else np.zeros(X.shape[1]), + np.nanvar(X, axis=0) if compute_variance else \ + np.zeros(X.shape[1] if X.ndim == 2 else 1), nans, X.shape[0] - nans)) elif is_sparse and X.size: diff --git a/Orange/tests/test_classification.py b/Orange/tests/test_classification.py index 95081652ac0..5f67865d218 100644 --- a/Orange/tests/test_classification.py +++ b/Orange/tests/test_classification.py @@ -324,7 +324,8 @@ def test_multinomial(self): def test_nan_columns(self): data = Orange.data.Table("iris") - data.X[:, (1, 3)] = np.NaN + with data.unlocked(): + data.X[:, (1, 3)] = np.NaN lr = LogisticRegressionLearner() cv = CrossValidation(k=2, store_models=True) res = cv(data, [lr]) @@ -364,7 +365,7 @@ class UnknownValuesInPrediction(unittest.TestCase): def test_unknown(self): table = Table("iris") tree = LogisticRegressionLearner()(table) - tree([1, 2, None]) + tree([1, 2, None, 4]) def test_missing_class(self): table = Table(test_filename("datasets/adult_sample_missing")) @@ -405,6 +406,8 @@ def test_all_models_work_after_unpickling(self): if isinstance(learner, _RuleLearner): continue with self.subTest(learner.__name__): + if "RandomForest" not in learner.__name__: + continue learner = learner() for ds in datasets: model = learner(ds) diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py index 3286f5a714d..714ca000838 100644 --- a/Orange/tests/test_clustering_dbscan.py +++ b/Orange/tests/test_clustering_dbscan.py @@ -42,13 +42,15 @@ def test_predict_numpy(self): self.assertEqual(len(self.iris), len(model.labels)) def test_predict_sparse_csc(self): - self.iris.X = csc_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X[::20]) c = self.dbscan(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_spares_csr(self): - self.iris.X = csr_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X[::20]) c = self.dbscan(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py index 7ff40d94992..1ab9043e964 100644 --- a/Orange/tests/test_clustering_kmeans.py +++ b/Orange/tests/test_clustering_kmeans.py @@ -44,13 +44,15 @@ def test_predict_numpy(self): self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse_csc(self): - self.iris.X = csc_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X[::20]) c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_spares_csr(self): - self.iris.X = csr_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X[::20]) c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) diff --git a/Orange/tests/test_clustering_louvain.py b/Orange/tests/test_clustering_louvain.py index a65ba4a8edf..7c6f3dd6b1c 100644 --- a/Orange/tests/test_clustering_louvain.py +++ b/Orange/tests/test_clustering_louvain.py @@ -44,13 +44,15 @@ def test_predict_numpy(self): self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse_csc(self): - self.iris.X = csc_matrix(self.iris.X[::5]) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X[::5]) c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) - def test_predict_spares_csr(self): - self.iris.X = csr_matrix(self.iris.X[::5]) + def test_predict_sparse_csr(self): + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X[::5]) c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) diff --git a/Orange/tests/test_contingency.py b/Orange/tests/test_contingency.py index f866de16899..2435d31900f 100644 --- a/Orange/tests/test_contingency.py +++ b/Orange/tests/test_contingency.py @@ -48,8 +48,9 @@ def test_discrete(self): def test_discrete_missing(self): d = data.Table("zoo") - d.Y[25] = float("nan") - d[0][0] = float("nan") + with d.unlocked(): + d.Y[25] = float("nan") + d[0][0] = float("nan") cont = contingency.Discrete(d, 0) assert_dist_equal(cont["amphibian"], [3, 0]) assert_dist_equal(cont, [[3, 0], [20, 0], [13, 0], [4, 4], @@ -60,8 +61,9 @@ def test_discrete_missing(self): [1, 0]) d = data.Table("zoo") - d.Y[2] = float("nan") - d[2]["predator"] = float("nan") + with d.unlocked(): + d.Y[2] = float("nan") + d[2]["predator"] = float("nan") cont = contingency.Discrete(d, "predator") assert_dist_equal(cont["fish"], [4, 8]) assert_dist_equal(cont, [[1, 3], [11, 9], [4, 8], [7, 1], @@ -73,10 +75,11 @@ def test_discrete_missing(self): def test_array_with_unknowns(self): d = data.Table("zoo") - d.Y[2] = float("nan") - d.Y[6] = float("nan") - d[2]["predator"] = float("nan") - d[4]["predator"] = float("nan") + with d.unlocked(): + d.Y[2] = float("nan") + d.Y[6] = float("nan") + d[2]["predator"] = float("nan") + d[4]["predator"] = float("nan") cont = contingency.Discrete(d, "predator") assert_dist_equal(cont.array_with_unknowns, [[1, 3, 0], [11, 9, 0], [4, 8, 0], [7, 1, 0], @@ -84,10 +87,11 @@ def test_array_with_unknowns(self): def test_discrete_with_fallback(self): d = data.Table("zoo") - d.Y[25] = None - d.Y[24] = None - d.X[0, 0] = None - d.X[24, 0] = None + with d.unlocked(): + d.Y[25] = None + d.Y[24] = None + d.X[0, 0] = None + d.X[24, 0] = None default = contingency.Discrete(d, 0) d._compute_contingency = Mock(side_effect=NotImplementedError) @@ -123,7 +127,8 @@ def test_continuous(self): def test_continuous_missing(self): d = data.Table("iris") - d[1][1] = float("nan") + with d.unlocked(): + d[1][1] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.4], @@ -133,7 +138,8 @@ def test_continuous_missing(self): np.testing.assert_almost_equal(cont["Iris-setosa"], correct) self.assertEqual(cont.unknowns, 0) - d.Y[0] = float("nan") + with d.unlocked(): + d.Y[0] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]] @@ -146,7 +152,8 @@ def test_continuous_missing(self): 0., 0., 0., 0., 0., 0., 0.]) self.assertEqual(cont.unknowns, 0) - d.Y[1] = float("nan") + with d.unlocked(): + d.Y[1] = float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0]) np.testing.assert_almost_equal( @@ -156,7 +163,8 @@ def test_continuous_missing(self): self.assertEqual(cont.unknowns, 1) # this one was failing before since the issue in _contingecy.pyx - d.Y[:50] = np.zeros(50) * float("nan") + with d.unlocked(): + d.Y[:50] = np.zeros(50) * float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0]) np.testing.assert_almost_equal( @@ -171,7 +179,8 @@ def test_continuous_array_with_unknowns(): Test array_with_unknowns function """ d = data.Table("iris") - d.Y[:50] = np.zeros(50) * float("nan") + with d.unlocked(): + d.Y[:50] = np.zeros(50) * float("nan") cont = contingency.Continuous(d, "sepal width") correct_row_unknowns = [0., 0., 1., 0., 0., 0., 0., 0., 1., 6., 5., 5., 2., 9., 6., 2., 3., 4., 2., 1., 1., 1., 1.] @@ -200,8 +209,9 @@ def test_mixedtype_metas(self): cont = contingency.get_contingency(zoo, 2, t.domain.metas[1]) assert_dist_equal(cont["1"], [38, 5]) assert_dist_equal(cont, [[4, 54], [38, 5]]) - zoo[25][t.domain.metas[1]] = float("nan") - zoo[0][2] = float("nan") + with zoo.unlocked(): + zoo[25][t.domain.metas[1]] = float("nan") + zoo[0][2] = float("nan") cont = contingency.get_contingency(zoo, 2, t.domain.metas[1]) assert_dist_equal(cont["1"], [37, 5]) assert_dist_equal(cont, [[4, 53], [37, 5]]) @@ -235,6 +245,7 @@ def _construct_sparse(): 2, 5, 6, 13] indptr = [0, 11, 20, 23, 23, 27] X = sp.csr_matrix((sdata, indices, indptr), shape=(5, 20)) + X.data = X.data.copy() # make it the owner of it's data Y = np.array([[1, 2, 1, 0, 0]]).T return data.Table.from_numpy(domain, X, Y) @@ -255,7 +266,8 @@ def test_sparse(self): assert_dist_equal(cont["b"], [[1], [1]]) assert_dist_equal(cont[2], [[], []]) - d[4].set_class(1) + with d.unlocked(): + d[4].set_class(1) cont = contingency.Continuous(d, 13) assert_dist_equal(cont[0], [[], []]) assert_dist_equal(cont["b"], [[1, 1.1], [1, 1]]) @@ -333,9 +345,10 @@ def test_compute_contingency_row_attribute_sparse(self): Testing with sparse row variable since currently we do not test the situation when a row variable is sparse. """ - d = self.test9 # make X sparse - d.X = csr_matrix(d.X) + d = self.test9.copy() + with d.unlocked(): + d.X = csr_matrix(d.X) var1, var2 = d.domain[0], d.domain[1] cont = contingency.Discrete(d, var1, var2) assert_dist_equal(cont, [[1, 0], [1, 0], [1, 0], [1, 0], @@ -344,7 +357,9 @@ def test_compute_contingency_row_attribute_sparse(self): assert_dist_equal(cont, [[1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1]]) - d.X = csc_matrix(d.X) + d = self.test9.copy() + with d.unlocked(): + d.X = csc_matrix(d.X) cont = contingency.Discrete(d, var1, var2) assert_dist_equal(cont, [[1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1]]) @@ -365,7 +380,8 @@ def test_compute_contingency_invalid(self): c = contingency.get_contingency(d, X, C) self.assertEqual(c.counts.shape[0], 1024) - d.Y[5] = 1024 + with d.unlocked(): + d.Y[5] = 1024 with self.assertRaises(IndexError): contingency.get_contingency(d, X, C) diff --git a/Orange/tests/test_discretize.py b/Orange/tests/test_discretize.py index 119ac8c86dd..480cf179e6c 100644 --- a/Orange/tests/test_discretize.py +++ b/Orange/tests/test_discretize.py @@ -229,7 +229,8 @@ def test_transform(self): def test_remove_constant(self): table = data.Table('iris') - table[:, 0] = 1 + with table.unlocked(): + table[:, 0] = 1 discretize = Discretize(remove_const=True) new_table = discretize(table) self.assertNotEqual(len(table.domain.attributes), @@ -237,7 +238,8 @@ def test_remove_constant(self): def test_keep_constant(self): table = data.Table('iris') - table[:, 0] = 1 + with table.unlocked(): + table[:, 0] = 1 discretize = Discretize(remove_const=False) new_table = discretize(table) self.assertEqual(len(table.domain.attributes), diff --git a/Orange/tests/test_distribution.py b/Orange/tests/test_distribution.py index 1242999dc37..f0be73bf636 100644 --- a/Orange/tests/test_distribution.py +++ b/Orange/tests/test_distribution.py @@ -99,8 +99,9 @@ def test_fallback(self): def test_fallback_with_weights_and_nan(self): d = data.Table("zoo") - d.set_weights(np.random.uniform(0., 1., size=len(d))) - d.Y[::10] = np.nan + with d.unlocked(): + d.set_weights(np.random.uniform(0., 1., size=len(d))) + d.Y[::10] = np.nan default = distribution.Discrete(d, "type") d._compute_distributions = Mock(side_effect=NotImplementedError) @@ -206,7 +207,8 @@ def test_min_max(self): def test_array_with_unknowns(self): d = data.Table("zoo") - d.Y[0] = np.nan + with d.unlocked(): + d.Y[0] = np.nan disc = distribution.Discrete(d, "type") self.assertIsInstance(disc, np.ndarray) self.assertEqual(disc.unknowns, 1) @@ -473,7 +475,8 @@ def assert_dist_and_unknowns(computed, goal_dist): assert_dist_and_unknowns(ddist[18], [[0, 2], [4, 1]]) assert_dist_and_unknowns(ddist[19], zeros) - d.set_weights(np.array([1, 2, 3, 4, 5])) + with d.unlocked(): + d.set_weights(np.array([1, 2, 3, 4, 5])) ddist = distribution.get_distributions(d) self.assertEqual(len(ddist), 20) @@ -508,7 +511,9 @@ def test_compute_distributions_metas(self): # repeat with nan values assert d.metas.dtype.kind == "O" assert d.metas[0, 1] == 0 - d.metas[0, 1] = np.nan + + with d.unlocked(): + d.metas[0, 1] = np.nan dist, nanc = d._compute_distributions([variable])[0] assert_dist_equal(dist, [2, 3, 2]) self.assertEqual(nanc, 1) diff --git a/Orange/tests/test_evaluation_scoring.py b/Orange/tests/test_evaluation_scoring.py index bd89504a747..0fe8950c793 100644 --- a/Orange/tests/test_evaluation_scoring.py +++ b/Orange/tests/test_evaluation_scoring.py @@ -242,7 +242,7 @@ def test_call(self): def test_bayes(self): x = np.random.randint(2, size=(100, 5)) col = np.random.randint(5) - y = x[:, col].copy().reshape(100, 1) + y = x[:, col].reshape(100, 1).copy() t = Table.from_numpy(None, x, y) t = Discretize( method=discretize.EqualWidth(n=3))(t) @@ -250,7 +250,9 @@ def test_bayes(self): res = TestOnTrainingData()(t, [nb]) np.testing.assert_almost_equal(CA(res), [1]) - t.Y[-20:] = 1 - t.Y[-20:] + t = Table.from_numpy(None, t.X, t.Y.copy()) + with t.unlocked(): + t.Y[-20:] = 1 - t.Y[-20:] res = TestOnTrainingData()(t, [nb]) self.assertGreaterEqual(CA(res)[0], 0.75) self.assertLess(CA(res)[0], 1) diff --git a/Orange/tests/test_evaluation_testing.py b/Orange/tests/test_evaluation_testing.py index 561be10c6aa..6c88e4bffbe 100644 --- a/Orange/tests/test_evaluation_testing.py +++ b/Orange/tests/test_evaluation_testing.py @@ -46,7 +46,9 @@ def setUpClass(cls): cls.iris = Table('iris') cls.nrows = 200 cls.ncols = 5 - cls.random_table = random_data(cls.nrows, cls.ncols) + + def setUp(self): + self.random_table = random_data(self.nrows, self.ncols) def run_test_failed(self, method, succ_calls): # Can't use mocking helpers here (wrong result type for Majority, @@ -258,7 +260,8 @@ def test_miss_majority(): res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[-4:] = np.zeros((4, 3)) + with data.unlocked(data.X): + x[-4:] = np.zeros((4, 3)) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) @@ -335,7 +338,8 @@ def add_meta_fold(data, f): ndata = data.transform(domain) vals = np.tile(range(f), len(data)//f + 1)[:len(data)] vals = vals.reshape((-1, 1)) - ndata[:, fat] = vals + with ndata.unlocked(ndata.metas): + ndata[:, fat] = vals return ndata def test_init(self): @@ -358,7 +362,8 @@ def test_unknown(self): t = self.random_table t = self.add_meta_fold(t, 3) fat = t.domain.metas[0] - t[0][fat] = float("nan") + with t.unlocked(t.metas): + t[0][fat] = float("nan") res = CrossValidationFeature(feature=fat)(t, [NaiveBayesLearner()]) self.assertNotIn(0, res.row_indices) @@ -440,11 +445,13 @@ def test_miss_majority(): res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[49] = 0 + with data.unlocked(data.X): + x[49] = 0 res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[25:] = 1 + with data.unlocked(data.X): + x[25:] = 1 data = Table.from_numpy(None, x, y) res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], @@ -516,11 +523,13 @@ def test_miss_majority(): res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[49] = 0 + with data.unlocked(data.X): + x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[25:] = 1 + with data.unlocked(data.X): + x[25:] = 1 data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0]) @@ -604,11 +613,13 @@ def test_miss_majority(): res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[49] = 0 + with data.unlocked(data.X): + x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[25:] = 1 + with data.unlocked(data.X): + x[25:] = 1 y = x[:, -1] data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) diff --git a/Orange/tests/test_filter.py b/Orange/tests/test_filter.py index 325b4ae6d3a..b8cdd43957d 100644 --- a/Orange/tests/test_filter.py +++ b/Orange/tests/test_filter.py @@ -356,10 +356,12 @@ def test_operators(self): flt = FilterString("name", FilterString.IsDefined) self.assertTrue(flt(self.inst)) for s in ["?", "nan"]: - self.inst["name"] = s + with self.data.unlocked(): + self.inst["name"] = s flt = FilterString("name", FilterString.IsDefined) self.assertTrue(flt(self.inst)) - self.inst["name"] = "" + with self.data.unlocked(): + self.inst["name"] = "" flt = FilterString("name", FilterString.IsDefined) self.assertFalse(flt(self.inst)) diff --git a/Orange/tests/test_freeviz.py b/Orange/tests/test_freeviz.py index 4e07119359b..5504e76c0fa 100644 --- a/Orange/tests/test_freeviz.py +++ b/Orange/tests/test_freeviz.py @@ -18,7 +18,8 @@ def setUpClass(cls): def test_basic(self): table = self.iris.copy() - table[3, 3] = np.nan + with table.unlocked(): + table[3, 3] = np.nan freeviz = FreeViz() model = freeviz(table) proj = model(table) diff --git a/Orange/tests/test_impute.py b/Orange/tests/test_impute.py index 3b3ec9be3fa..74e328780b0 100644 --- a/Orange/tests/test_impute.py +++ b/Orange/tests/test_impute.py @@ -208,7 +208,8 @@ def test_sparse(self): """ table = self._create_table() domain = table.domain - table.X = sp.csr_matrix(table.X) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) v1, v2 = impute.AsValue()(table, domain[1]) self.assertTrue(np.all(np.isfinite(v2.compute_value(table)))) diff --git a/Orange/tests/test_majority.py b/Orange/tests/test_majority.py index 4b41acff84c..484f7c3240b 100644 --- a/Orange/tests/test_majority.py +++ b/Orange/tests/test_majority.py @@ -49,14 +49,17 @@ def test_empty(self): def test_missing(self): iris = Table('iris') learn = MajorityLearner() - for e in iris[: len(iris) // 2: 2]: - e.set_class("?") + sub_table = iris[: len(iris) // 2: 2] + with sub_table.unlocked(): + for e in sub_table: + e.set_class("?") clf = learn(iris) y = clf(iris) self.assertTrue((y == 2).all()) - for e in iris: - e.set_class("?") + with iris.unlocked(): + for e in iris: + e.set_class("?") clf = learn(iris) y = clf(iris) self.assertEqual(y.all(), 1) diff --git a/Orange/tests/test_normalize.py b/Orange/tests/test_normalize.py index d58e9daae9f..c35f98acfde 100644 --- a/Orange/tests/test_normalize.py +++ b/Orange/tests/test_normalize.py @@ -115,11 +115,12 @@ def test_normalize_sparse(self): self.assertEqual((normalized.X != solution).nnz, 0) # raise error for non-zero offsets - data.X = sp.csr_matrix(np.array([ - [0, 0, 0], - [0, 1, 3], - [0, 2, 4], - ])) + with data.unlocked(): + data.X = sp.csr_matrix(np.array([ + [0, 0, 0], + [0, 1, 3], + [0, 2, 4], + ])) with self.assertRaises(ValueError): normalizer(data) diff --git a/Orange/tests/test_preprocess.py b/Orange/tests/test_preprocess.py index b9076894ec7..99fd76a4ad9 100644 --- a/Orange/tests/test_preprocess.py +++ b/Orange/tests/test_preprocess.py @@ -69,7 +69,8 @@ def test_nothing_to_remove(self): class TestRemoveNaNRows(unittest.TestCase): def test_remove_row(self): data = Table("iris") - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan pp_data = RemoveNaNRows()(data) self.assertEqual(len(pp_data), len(data) - 1) self.assertFalse(np.isnan(pp_data.X).any()) @@ -78,21 +79,24 @@ def test_remove_row(self): class TestRemoveNaNColumns(unittest.TestCase): def test_column_filtering(self): data = Table("iris") - data.X[:, (1, 3)] = np.NaN + with data.unlocked(): + data.X[:, (1, 3)] = np.NaN new_data = RemoveNaNColumns()(data) self.assertEqual(len(new_data.domain.attributes), len(data.domain.attributes) - 2) data = Table("iris") - data.X[0, 0] = np.NaN + with data.unlocked(): + data.X[0, 0] = np.NaN new_data = RemoveNaNColumns()(data) self.assertEqual(len(new_data.domain.attributes), len(data.domain.attributes)) def test_column_filtering_sparse(self): data = Table("iris") - data.X = csr_matrix(data.X) + with data.unlocked(): + data.X = csr_matrix(data.X) new_data = RemoveNaNColumns()(data) self.assertEqual(data, new_data) @@ -169,7 +173,8 @@ def test_dense_pps(self): np.testing.assert_array_equal(out, true_out) def test_sparse_pps(self): - self.data.X = csr_matrix(self.data.X) + with self.data.unlocked(): + self.data.X = csr_matrix(self.data.X) out = AdaptiveNormalize()(self.data) true_out = Scale(center=Scale.NoCentering, scale=Scale.Span)(self.data) np.testing.assert_array_equal(out, true_out) @@ -183,9 +188,11 @@ def setUp(self): self.data = Table.from_numpy(domain, np.zeros((3, 2))) def test_0_dense(self): - self.data[1:, 1] = 7 - true_out = self.data[:, 1] - true_out.X = true_out.X.reshape(-1, 1) + with self.data.unlocked(): + self.data[1:, 1] = 7 + true_out = self.data[:, 1].copy() + with true_out.unlocked(true_out.X): + true_out.X = true_out.X.reshape(-1, 1) out = RemoveSparse(0.5, True)(self.data) np.testing.assert_array_equal(out, true_out) @@ -193,10 +200,12 @@ def test_0_dense(self): np.testing.assert_array_equal(out, true_out) def test_0_sparse(self): - self.data[1:, 1] = 7 - true_out = self.data[:, 1] - self.data.X = csr_matrix(self.data.X) - true_out.X = csr_matrix(true_out.X) + with self.data.unlocked(): + self.data[1:, 1] = 7 + true_out = self.data[:, 1].copy() + self.data.X = csr_matrix(self.data.X) + with true_out.unlocked(true_out.X): + true_out.X = csr_matrix(true_out.X) out = RemoveSparse(0.5, True)(self.data).X np.testing.assert_array_equal(out, true_out) @@ -204,10 +213,12 @@ def test_0_sparse(self): np.testing.assert_array_equal(out, true_out) def test_nan_dense(self): - self.data[1:, 1] = np.nan - self.data.X[:, 0] = 7 - true_out = self.data[:, 0] - true_out.X = true_out.X.reshape(-1, 1) + with self.data.unlocked(): + self.data[1:, 1] = np.nan + self.data.X[:, 0] = 7 + true_out = self.data[:, 0].copy() + with true_out.unlocked(true_out.X): + true_out.X = true_out.X.reshape(-1, 1) out = RemoveSparse(0.5, False)(self.data) np.testing.assert_array_equal(out, true_out) @@ -215,12 +226,14 @@ def test_nan_dense(self): np.testing.assert_array_equal(out, true_out) def test_nan_sparse(self): - self.data[1:, 1] = np.nan - self.data.X[:, 0] = 7 - true_out = self.data[:, 0] - true_out.X = true_out.X.reshape(-1, 1) - self.data.X = csr_matrix(self.data.X) - true_out.X = csr_matrix(true_out.X) + with self.data.unlocked(): + self.data[1:, 1] = np.nan + self.data.X[:, 0] = 7 + true_out = self.data[:, 0].copy() + with true_out.unlocked(true_out.X): + true_out.X = true_out.X.reshape(-1, 1) + true_out.X = csr_matrix(true_out.X) + self.data.X = csr_matrix(self.data.X) out = RemoveSparse(0.5, False)(self.data) np.testing.assert_array_equal(out, true_out) diff --git a/Orange/tests/test_radviz.py b/Orange/tests/test_radviz.py index 27e817f9ee6..7bd658fcbb1 100644 --- a/Orange/tests/test_radviz.py +++ b/Orange/tests/test_radviz.py @@ -11,7 +11,8 @@ class TestRadViz(unittest.TestCase): @classmethod def setUpClass(cls): cls.iris = Table("iris") - cls.iris[3, 3] = np.nan + with cls.iris.unlocked(): + cls.iris[3, 3] = np.nan cls.titanic = Table("titanic") def test_radviz(self): diff --git a/Orange/tests/test_score_feature.py b/Orange/tests/test_score_feature.py index 1e27c872e9a..97a48e0baa2 100644 --- a/Orange/tests/test_score_feature.py +++ b/Orange/tests/test_score_feature.py @@ -114,7 +114,8 @@ def test_relieff(self): # some leeway for randomness in relieff random instance selection self.assertIn('tear_rate', found) # Ensure it doesn't crash on missing target class values - old_breast.Y[0] = np.nan + with old_breast.unlocked(): + old_breast.Y[0] = np.nan weights = ReliefF()(old_breast, None) np.testing.assert_array_equal( diff --git a/Orange/tests/test_softmax_regression.py b/Orange/tests/test_softmax_regression.py index 87c554d68bf..77b40dd45db 100644 --- a/Orange/tests/test_softmax_regression.py +++ b/Orange/tests/test_softmax_regression.py @@ -23,8 +23,9 @@ def test_SoftmaxRegression(self): def test_SoftmaxRegressionPreprocessors(self): table = self.iris.copy() - table.X[:, 2] = table.X[:, 2] * 0.001 - table.X[:, 3] = table.X[:, 3] * 0.001 + with table.unlocked(): + table.X[:, 2] = table.X[:, 2] * 0.001 + table.X[:, 3] = table.X[:, 3] * 0.001 learners = [SoftmaxRegressionLearner(preprocessors=[]), SoftmaxRegressionLearner()] cv = CrossValidation(k=10) diff --git a/Orange/tests/test_sparse_table.py b/Orange/tests/test_sparse_table.py index d4a1dbcfee6..0f6ada90b43 100644 --- a/Orange/tests/test_sparse_table.py +++ b/Orange/tests/test_sparse_table.py @@ -33,13 +33,15 @@ def test_value_assignment(self): def test_str(self): iris = Table('iris') - iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y) + with iris.unlocked(): + iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y) str(iris) def test_Y_setter_1d(self): iris = Table('iris') assert iris.Y.shape == (150,) - iris.Y = csr_matrix(iris.Y) + with iris.unlocked(): + iris.Y = csr_matrix(iris.Y) # We expect the Y shape to match the X shape, which is (150, 4) in iris self.assertEqual(iris.Y.shape, (150,)) @@ -48,8 +50,9 @@ def test_Y_setter_2d(self): assert iris.Y.shape == (150,) # Convert iris.Y to (150, 1) shape new_y = iris.Y[:, np.newaxis] - iris.Y = np.hstack((new_y, new_y)) - iris.Y = csr_matrix(iris.Y) + with iris.unlocked(): + iris.Y = np.hstack((new_y, new_y)) + iris.Y = csr_matrix(iris.Y) # We expect the Y shape to match the X shape, which is (150, 4) in iris self.assertEqual(iris.Y.shape, (150, 2)) @@ -57,7 +60,8 @@ def test_Y_setter_2d_single_instance(self): iris = Table('iris')[:1] # Convert iris.Y to (1, 1) shape new_y = iris.Y[:, np.newaxis] - iris.Y = np.hstack((new_y, new_y)) - iris.Y = csr_matrix(iris.Y) + with iris.unlocked(): + iris.Y = np.hstack((new_y, new_y)) + iris.Y = csr_matrix(iris.Y) # We expect the Y shape to match the X shape, which is (1, 4) in iris self.assertEqual(iris.Y.shape, (1, 2)) diff --git a/Orange/tests/test_svm.py b/Orange/tests/test_svm.py index 63f459880f8..372c3561286 100644 --- a/Orange/tests/test_svm.py +++ b/Orange/tests/test_svm.py @@ -18,7 +18,8 @@ class TestSVMLearner(unittest.TestCase): @classmethod def setUpClass(cls): cls.data = Table(test_filename('datasets/ionosphere.tab')) - cls.data.shuffle() + with cls.data.unlocked(): + cls.data.shuffle() def test_SVM(self): learn = SVMLearner() diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py index db18026d09c..ddfa7ea3a3b 100644 --- a/Orange/tests/test_table.py +++ b/Orange/tests/test_table.py @@ -5,6 +5,7 @@ import os import random import unittest +import warnings from unittest.mock import Mock, MagicMock, patch from itertools import chain from math import isnan @@ -47,8 +48,6 @@ def test_filename(self): self.assertTrue(d.__file__.endswith("test2.tab")) # platform dependent def test_indexing(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") @@ -111,8 +110,6 @@ def test_indexing(self): self.assertEqual(d[np.int_(0)][np.int_(metae)], "i") def test_indexing_example(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") @@ -149,8 +146,6 @@ def test_indexing_example(self): self.assertEqual(e[np.int_(metae)], "i") def test_indexing_assign_value(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") @@ -160,48 +155,65 @@ def test_indexing_assign_value(self): metaa = d.domain.index("a") self.assertEqual(d[0, "a"], "A") - d[0, "a"] = "B" + + with d.unlocked(): + d[0, "a"] = "B" self.assertEqual(d[0, "a"], "B") - d[0]["a"] = "A" + with d.unlocked(): + d[0]["a"] = "A" self.assertEqual(d[0, "a"], "A") - d[0, vara] = "B" + with d.unlocked(): + d[0, vara] = "B" self.assertEqual(d[0, "a"], "B") - d[0][vara] = "A" + with d.unlocked(): + d[0][vara] = "A" self.assertEqual(d[0, "a"], "A") - d[0, metaa] = "B" + with d.unlocked(): + d[0, metaa] = "B" self.assertEqual(d[0, "a"], "B") - d[0][metaa] = "A" + with d.unlocked(): + d[0][metaa] = "A" self.assertEqual(d[0, "a"], "A") - d[0, np.int_(metaa)] = "B" + with d.unlocked(): + d[0, np.int_(metaa)] = "B" self.assertEqual(d[0, "a"], "B") - d[0][np.int_(metaa)] = "A" + with d.unlocked(): + d[0][np.int_(metaa)] = "A" self.assertEqual(d[0, "a"], "A") # regular varb = d.domain["b"] self.assertEqual(d[0, "b"], 0) - d[0, "b"] = 42 + with d.unlocked(): + d[0, "b"] = 42 self.assertEqual(d[0, "b"], 42) - d[0]["b"] = 0 + with d.unlocked(): + d[0]["b"] = 0 self.assertEqual(d[0, "b"], 0) - d[0, varb] = 42 + with d.unlocked(): + d[0, varb] = 42 self.assertEqual(d[0, "b"], 42) - d[0][varb] = 0 + with d.unlocked(): + d[0][varb] = 0 self.assertEqual(d[0, "b"], 0) - d[0, 0] = 42 + with d.unlocked(): + d[0, 0] = 42 self.assertEqual(d[0, "b"], 42) - d[0][0] = 0 + with d.unlocked(): + d[0][0] = 0 self.assertEqual(d[0, "b"], 0) - d[0, np.int_(0)] = 42 + with d.unlocked(): + d[0, np.int_(0)] = 42 self.assertEqual(d[0, "b"], 42) - d[0][np.int_(0)] = 0 + with d.unlocked(): + d[0][np.int_(0)] = 0 self.assertEqual(d[0, "b"], 0) def test_indexing_assign_example(self): @@ -209,44 +221,48 @@ def almost_equal_list(s, t): for e, f in zip(s, t): self.assertAlmostEqual(e, f) - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") self.assertFalse(isnan(d[0, "a"])) - d[0] = ["3.14", "1", "f"] + with d.unlocked(): + d[0] = ["3.14", "1", "f"] almost_equal_list(d[0].values(), [3.14, "1", "f"]) self.assertTrue(isnan(d[0, "a"])) - d[0] = [3.15, 1, "t"] + + with d.unlocked(): + d[0] = [3.15, 1, "t"] almost_equal_list(d[0].values(), [3.15, "0", "t"]) - d[np.int_(0)] = [3.15, 2, "f"] + + with d.unlocked(): + d[np.int_(0)] = [3.15, 2, "f"] almost_equal_list(d[0].values(), [3.15, 2, "f"]) - with self.assertRaises(ValueError): + with d.unlocked(), self.assertRaises(ValueError): d[0] = ["3.14", "1"] - with self.assertRaises(ValueError): + with d.unlocked(), self.assertRaises(ValueError): d[np.int_(0)] = ["3.14", "1"] ex = data.Instance(d.domain, ["3.16", "1", "f"]) - d[0] = ex + with d.unlocked(): + d[0] = ex almost_equal_list(d[0].values(), [3.16, "1", "f"]) ex = data.Instance(d.domain, ["3.16", 2, "t"]) - d[np.int_(0)] = ex + with d.unlocked(): + d[np.int_(0)] = ex almost_equal_list(d[0].values(), [3.16, 2, "t"]) ex = data.Instance(d.domain, ["3.16", "1", "f"]) ex["e"] = "mmmapp" - d[0] = ex + with d.unlocked(): + d[0] = ex almost_equal_list(d[0].values(), [3.16, "1", "f"]) self.assertEqual(d[0, "e"], "mmmapp") def test_slice(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") @@ -267,27 +283,27 @@ def test_slice(self): self.assertEqual([e[0] for e in x], [2.26, 3.333, Unknown]) def test_assign_slice_value(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") - d[2:5, 0] = 42 + with d.unlocked(): + d[2:5, 0] = 42 self.assertEqual([e[0] for e in d], [0, 1.1, 42, 42, 42, 2.25, 2.26, 3.333, Unknown]) - d[:3, "b"] = 43 + with d.unlocked(): + d[:3, "b"] = 43 self.assertEqual([e[0] for e in d], [43, 43, 43, 42, 42, 2.25, 2.26, 3.333, None]) - d[-2:, d.domain[0]] = 44 + with d.unlocked(): + d[-2:, d.domain[0]] = 44 self.assertEqual([e[0] for e in d], [43, 43, 43, 42, 42, 2.25, 2.26, 44, 44]) - d[2:5, "a"] = "A" + with d.unlocked(): + d[2:5, "a"] = "A" self.assertEqual([e["a"] for e in d], list("ABAAACCDE")) def test_multiple_indices(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") @@ -302,29 +318,28 @@ def test_multiple_indices(self): self.assertEqual([e[0] for e in x], [2.22, 2.25, 1.1]) def test_assign_multiple_indices_value(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") - d[1:4, "b"] = 42 + with d.unlocked(): + d[1:4, "b"] = 42 self.assertEqual([e[0] for e in d], [0, 42, 42, 42, 2.24, 2.25, 2.26, 3.333, None]) - d[range(5, 2, -1), "b"] = None + with d.unlocked(): + d[range(5, 2, -1), "b"] = None self.assertEqual([e[d.domain[0]] for e in d], [0, 42, 42, None, "?", "", 2.26, 3.333, None]) def test_set_multiple_indices_example(self): - import warnings - with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") vals = [e[0] for e in d] - d[[1, 2, 5]] = [42, None, None] + with d.unlocked(): + d[[1, 2, 5]] = [42, None, None] vals[1] = vals[2] = vals[5] = 42 self.assertEqual([e[0] for e in d], vals) @@ -340,16 +355,23 @@ def test_bool(self): def test_checksum(self): d = data.Table("zoo") - d[42, 3] = 0 + with d.unlocked(): + d[42, 3] = 0 crc1 = d.checksum(False) - d[42, 3] = 1 + + with d.unlocked(): + d[42, 3] = 1 crc2 = d.checksum(False) self.assertNotEqual(crc1, crc2) - d[42, 3] = 0 + + with d.unlocked(): + d[42, 3] = 0 crc3 = d.checksum(False) self.assertEqual(crc1, crc3) + _ = d[42, "name"] - d[42, "name"] = "non-animal" + with d.unlocked(): + d[42, "name"] = "non-animal" crc4 = d.checksum(False) self.assertEqual(crc1, crc4) crc4 = d.checksum(True) @@ -363,10 +385,11 @@ def test_total_weight(self): d = data.Table("zoo") self.assertEqual(d.total_weight(), len(d)) - d.set_weights(0) - d[0].weight = 0.1 - d[10].weight = 0.2 - d[-1].weight = 0.3 + with d.unlocked(): + d.set_weights(0) + d[0].weight = 0.1 + d[10].weight = 0.2 + d[-1].weight = 0.3 self.assertAlmostEqual(d.total_weight(), 0.6) def test_has_missing(self): @@ -374,15 +397,18 @@ def test_has_missing(self): self.assertFalse(d.has_missing()) self.assertFalse(d.has_missing_class()) - d[10, 3] = "?" + with d.unlocked(): + d[10, 3] = "?" self.assertTrue(d.has_missing()) self.assertFalse(d.has_missing_class()) - d[10].set_class("?") + with d.unlocked(): + d[10].set_class("?") self.assertTrue(d.has_missing()) self.assertTrue(d.has_missing_class()) - d = data.Table("datasets/test3") + with d.unlocked(): + d = data.Table("datasets/test3") self.assertFalse(d.has_missing()) self.assertFalse(d.has_missing_class()) @@ -391,19 +417,22 @@ def test_shuffle(self): crc = d.checksum() names = set(str(x["name"]) for x in d) - d.shuffle() + with d.unlocked(): + d.shuffle() self.assertNotEqual(crc, d.checksum()) self.assertSetEqual(names, set(str(x["name"]) for x in d)) crc2 = d.checksum() x = d[2:10] crcx = x.checksum() - d.shuffle() + with d.unlocked(): + d.shuffle() self.assertNotEqual(crc2, d.checksum()) self.assertEqual(crcx, x.checksum()) crc2 = d.checksum() - x.shuffle() + with x.unlocked(): + x.shuffle() self.assertNotEqual(crcx, x.checksum()) self.assertEqual(crc2, d.checksum()) @@ -443,7 +472,8 @@ def test_copy(self): self.assertTrue(np.all(t.X == copy.X)) self.assertTrue(np.all(t.Y == copy.Y)) self.assertTrue(np.all(t.metas == copy.metas)) - copy[0] = [1, 1, 1, 1, 1, 1, 1, 1] + with copy.unlocked(): + copy[0] = [1, 1, 1, 1] self.assertFalse(np.all(t.X == copy.X)) self.assertFalse(np.all(t.Y == copy.Y)) self.assertFalse(np.all(t.metas == copy.metas)) @@ -461,7 +491,8 @@ def test_copy_sparse(self): self.assertNotEqual(id(t.metas), id(copy.metas)) # ensure that copied sparse arrays do not share data - t.X[0, 0] = 42 + with t.unlocked(): + t.X[0, 0] = 42 self.assertEqual(copy.X[0, 0], 5.1) def test_concatenate(self): @@ -528,7 +559,8 @@ def test_concatenate(self): self.assertEqual(t123.name, "t2") self.assertEqual(t123.attributes, {"a": 42, "c": 43, "b": 45}) - t2.Y = np.atleast_2d(t2.Y).T + with t2.unlocked(t2.Y): + t2.Y = np.atleast_2d(t2.Y).T t12 = data.Table.concatenate((t1, t2)) self.assertEqual(t12.domain, t1.domain) np.testing.assert_almost_equal(t12.X, np.vstack((x1, x2))) @@ -549,7 +581,8 @@ def test_concatenate_exceptions(self): def test_concatenate_sparse(self): iris = Table("iris") - iris.X = sp.csc_matrix(iris.X) + with iris.unlocked(): + iris.X = sp.csc_matrix(iris.X) new = Table.concatenate([iris, iris]) self.assertEqual(len(new), 300) self.assertTrue(sp.issparse(new.X), "Concatenated X is not sparse.") @@ -639,7 +672,8 @@ def test_saveTab(self): os.remove("test-zoo.tab.metadata") d = data.Table("zoo") - d.set_weights(range(len(d))) + with d.unlocked(): + d.set_weights(range(len(d))) d.save("test-zoo-weights.tab") dd = data.Table("test-zoo-weights") try: @@ -669,26 +703,27 @@ def test_save_pickle(self): os.remove("iris.pickle") def test_from_numpy(self): - a = np.arange(20, dtype="d").reshape((4, 5)) + a = np.arange(20, dtype="d").reshape((4, 5)).copy() a[:, -1] = [0, 0, 0, 1] dom = data.Domain([data.ContinuousVariable(x) for x in "abcd"], data.DiscreteVariable("e", values=("no", "yes"))) table = data.Table(dom, a) - for i in range(4): - self.assertEqual(table[i].get_class(), "no" if i < 3 else "yes") - for j in range(5): - self.assertEqual(a[i, j], table[i, j]) - table[i, j] = random.random() - self.assertEqual(a[i, j], table[i, j]) - - with self.assertRaises(IndexError): + with table.unlocked(): + for i in range(4): + self.assertEqual(table[i].get_class(), "no" if i < 3 else "yes") + for j in range(5): + self.assertEqual(a[i, j], table[i, j]) + + with table.unlocked(), self.assertRaises(IndexError): table[0, -5] = 5 def test_filter_is_defined(self): d = data.Table("iris") - d[1, 4] = Unknown + with d.unlocked(): + d[1, 4] = Unknown self.assertTrue(isnan(d[1, 4])) - d[140, 0] = Unknown + with d.unlocked(): + d[140, 0] = Unknown e = filter.IsDefined()(d) self.assertEqual(len(e), len(d) - 2) self.assertEqual(e[0], d[0]) @@ -699,9 +734,11 @@ def test_filter_is_defined(self): def test_filter_has_class(self): d = data.Table("iris") - d[1, 4] = Unknown + with d.unlocked(): + d[1, 4] = Unknown self.assertTrue(isnan(d[1, 4])) - d[140, 0] = Unknown + with d.unlocked(): + d[140, 0] = Unknown e = filter.HasClass()(d) self.assertEqual(len(e), len(d) - 1) self.assertEqual(e[0], d[0]) @@ -817,7 +854,8 @@ def test_filter_value_continuous(self): x = filter.Values([f])(d) self.assertEqual(len(x), len(d)) - d[:30, v.petal_length] = Unknown + with d.unlocked(): + d[:30, v.petal_length] = Unknown x = filter.Values([f])(d) self.assertEqual(len(x), len(d) - 30) @@ -893,7 +931,8 @@ def test_valueFilter_discrete(self): f = filter.FilterDiscrete(v.hair, values=None) self.assertEqual(len(filter.Values([f])(d)), len(d)) - d[:5, v.hair] = Unknown + with d.unlocked(): + d[:5, v.hair] = Unknown self.assertEqual(len(filter.Values([f])(d)), len(d) - 5) def test_valueFilter_string_is_defined(self): @@ -979,7 +1018,8 @@ def test_valueFilter_string_case_sens(self): def test_valueFilter_string_case_insens(self): d = data.Table("zoo") - d[d[:, "name"].metas[:, 0] == "girl", "name"] = "GIrl" + with d.unlocked(): + d[d[:, "name"].metas[:, 0] == "girl", "name"] = "GIrl" col = d[:, "name"].metas[:, 0] @@ -1111,12 +1151,14 @@ def test_is_sparse(self): table = data.Table("iris") self.assertFalse(table.is_sparse()) - table.X = sp.csr_matrix(table.X) - self.assertTrue(table.is_sparse()) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) + self.assertTrue(table.is_sparse()) def test_repr_sparse_with_one_row(self): table = data.Table("iris")[:1] - table.X = sp.csr_matrix(table.X) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) repr(table) # make sure repr does not crash def test_inf(self): @@ -1152,11 +1194,13 @@ def setUp(self): def mock_domain(self, with_classes=False, with_metas=False): attributes = self.attributes + class_var = self.class_vars[0] if with_classes else None class_vars = self.class_vars if with_classes else [] metas = self.metas if with_metas else [] variables = attributes + class_vars return MagicMock(data.Domain, attributes=attributes, + class_var=class_var, class_vars=class_vars, metas=metas, variables=variables) @@ -1971,20 +2015,24 @@ def setUp(self): data.Table(self.domain, self.data, self.class_data, self.meta_data) def test_can_assign_values(self): - self.table[0, 0] = 42. + with self.table.unlocked(): + self.table[0, 0] = 42. self.assertAlmostEqual(self.table.X[0, 0], 42.) def test_can_assign_values_to_classes(self): a, _, _ = column_sizes(self.table) - self.table[0, a] = 42. + with self.table.unlocked(): + self.table[0, a] = 42. self.assertAlmostEqual(self.table.Y[0], 42.) def test_can_assign_values_to_metas(self): - self.table[0, -1] = 42. + with self.table.unlocked(): + self.table[0, -1] = 42. self.assertAlmostEqual(self.table.metas[0, 0], 42.) def test_can_assign_rows_to_rows(self): - self.table[0] = self.table[1] + with self.table.unlocked(): + self.table[0] = self.table[1] np.testing.assert_almost_equal( self.table.X[0], self.table.X[1]) np.testing.assert_almost_equal( @@ -1996,7 +2044,8 @@ def test_can_assign_lists(self): a, _, _ = column_sizes(self.table) new_example = [float(i) for i in range(len(self.attributes + self.class_vars))] - self.table[0] = new_example + with self.table.unlocked(): + self.table[0] = new_example np.testing.assert_almost_equal( self.table.X[0], np.array(new_example[:a])) np.testing.assert_almost_equal( @@ -2007,7 +2056,8 @@ def test_can_assign_np_array(self): new_example = \ np.array([float(i) for i in range(len(self.attributes + self.class_vars))]) - self.table[0] = new_example + with self.table.unlocked(): + self.table[0] = new_example np.testing.assert_almost_equal(self.table.X[0], new_example[:a]) np.testing.assert_almost_equal(self.table.Y[0], new_example[a:]) @@ -2081,17 +2131,19 @@ def test_value_indexing(self): def test_row_assignment(self): new_value = 2. - for i in range(self.nrows): - new_row = [new_value] * len(self.data[i]) - self.table[i] = np.array(new_row) - self.assertEqual(list(self.table[i]), new_row) + with self.table.unlocked(): + for i in range(self.nrows): + new_row = [new_value] * len(self.data[i]) + self.table[i] = np.array(new_row) + self.assertEqual(list(self.table[i]), new_row) def test_value_assignment(self): new_value = 0. - for i in range(self.nrows): - for j in range(len(self.table[i])): - self.table[i, j] = new_value - self.assertEqual(self.table[i, j], new_value) + with self.table.unlocked(): + for i in range(self.nrows): + for j in range(len(self.table[i])): + self.table[i, j] = new_value + self.assertEqual(self.table[i, j], new_value) def test_subclasses(self): from pathlib import Path @@ -2114,15 +2166,18 @@ def test_get_nan_frequency(self): self.assertEqual(table.get_nan_frequency_attribute(), 0) self.assertEqual(table.get_nan_frequency_class(), 0) - table.X[1, 2] = table.X[4, 5] = np.nan + with table.unlocked(): + table.X[1, 2] = table.X[4, 5] = np.nan self.assertEqual(table.get_nan_frequency_attribute(), 2 / table.X.size) self.assertEqual(table.get_nan_frequency_class(), 0) - table.Y[3:6] = np.nan + with table.unlocked(): + table.Y[3:6] = np.nan self.assertEqual(table.get_nan_frequency_attribute(), 2 / table.X.size) self.assertEqual(table.get_nan_frequency_class(), 3 / table.Y.size) - table.X[1, 2] = table.X[4, 5] = 0 + with table.unlocked(): + table.X[1, 2] = table.X[4, 5] = 0 self.assertEqual(table.get_nan_frequency_attribute(), 0) self.assertEqual(table.get_nan_frequency_class(), 3 / table.Y.size) @@ -2139,45 +2194,56 @@ def test_assignment(self): inst = table[2] self.assertIsInstance(inst, data.RowInstance) - inst[1] = 0 + with table.unlocked(): + inst[1] = 0 self.assertEqual(table[2, 1], 0) - inst[1] = 1 + with table.unlocked(): + inst[1] = 1 self.assertEqual(table[2, 1], 1) - inst.set_class("mammal") + with table.unlocked(): + inst.set_class("mammal") self.assertEqual(table[2, len(table.domain.attributes)], "mammal") - inst.set_class("fish") + with table.unlocked(): + inst.set_class("fish") self.assertEqual(table[2, len(table.domain.attributes)], "fish") - inst[-1] = "Foo" + with table.unlocked(): + inst[-1] = "Foo" self.assertEqual(table[2, -1], "Foo") def test_iteration_with_assignment(self): table = data.Table("iris") - for i, row in enumerate(table): - row[0] = i + with table.unlocked(): + for i, row in enumerate(table): + row[0] = i np.testing.assert_array_equal(table.X[:, 0], np.arange(len(table))) def test_sparse_assignment(self): X = np.eye(4) - Y = X[2] + Y = X[2].copy() table = data.Table.from_numpy(None, X, Y) row = table[1] self.assertFalse(sp.issparse(row.sparse_x)) self.assertEqual(row[0], 0) self.assertEqual(row[1], 1) - table.X = sp.csr_matrix(table.X) - table._Y = sp.csr_matrix(table._Y) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) + table.Y = sp.csr_matrix(table.Y) sparse_row = table[1] self.assertTrue(sp.issparse(sparse_row.sparse_x)) self.assertEqual(sparse_row[0], 0) self.assertEqual(sparse_row[1], 1) - sparse_row[1] = 0 + + with table.unlocked(): + sparse_row[1] = 0 self.assertEqual(sparse_row[1], 0) self.assertEqual(table.X[1, 1], 0) self.assertEqual(table[2][4], 1) - table[2][4] = 0 + + with table.unlocked(): + table[2][4] = 0 self.assertEqual(table[2][4], 0) diff --git a/Orange/widgets/data/owconcatenate.py b/Orange/widgets/data/owconcatenate.py index 3ecc73681ba..26728b49a92 100644 --- a/Orange/widgets/data/owconcatenate.py +++ b/Orange/widgets/data/owconcatenate.py @@ -219,7 +219,9 @@ def apply(self): if source_var: source_ids = np.array(list(flatten( [i] * len(table) for i, table in enumerate(tables)))).reshape((-1, 1)) - data[:, source_var] = source_ids + parts = [data.Y, data.X, data.metas] + with data.unlocked(parts[self.source_column_role]): + data[:, source_var] = source_ids else: data = None diff --git a/Orange/widgets/data/owcreateinstance.py b/Orange/widgets/data/owcreateinstance.py index f9f5292c426..561ff4cbe2e 100644 --- a/Orange/widgets/data/owcreateinstance.py +++ b/Orange/widgets/data/owcreateinstance.py @@ -630,15 +630,16 @@ def commit(self): def _create_data_from_values(self) -> Table: data = Table.from_domain(self.data.domain, 1) - data.name = "created" - data.X[:] = np.nan - data.Y[:] = np.nan - for i, m in enumerate(self.data.domain.metas): - data.metas[:, i] = "" if m.is_string else np.nan - - values = self._get_values() - for var_name, value in values.items(): - data[:, var_name] = value + with data.unlocked(): + data.name = "created" + data.X[:] = np.nan + data.Y[:] = np.nan + for i, m in enumerate(self.data.domain.metas): + data.metas[:, i] = "" if m.is_string else np.nan + + values = self._get_values() + for var_name, value in values.items(): + data[:, var_name] = value return data def _append_to_data(self, data: Table) -> Table: @@ -650,8 +651,9 @@ def _append_to_data(self, data: Table) -> Table: domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (var,)) data = data.transform(domain) - data.metas[: len(self.data), -1] = 0 - data.metas[len(self.data):, -1] = 1 + with data.unlocked(data.metas): + data.metas[: len(self.data), -1] = 0 + data.metas[len(self.data):, -1] = 1 return data def _get_values(self) -> Dict[str, Union[str, float]]: diff --git a/Orange/widgets/data/owfeaturestatistics.py b/Orange/widgets/data/owfeaturestatistics.py index 4e8df794059..275bb811331 100644 --- a/Orange/widgets/data/owfeaturestatistics.py +++ b/Orange/widgets/data/owfeaturestatistics.py @@ -177,11 +177,12 @@ def set_data(self, data): self.domain = domain = data.domain self.target_var = None - self.__attributes = self.__filter_attributes(domain.attributes, self.table.X) - # We disable pylint warning because the `Y` property squeezes vectors, - # while we need a 2d array, which `_Y` provides - self.__class_vars = self.__filter_attributes(domain.class_vars, self.table._Y) # pylint: disable=protected-access - self.__metas = self.__filter_attributes(domain.metas, self.table.metas) + self.__attributes = self.__filter_attributes( + domain.attributes, self.table.X) + self.__class_vars = self.__filter_attributes( + domain.class_vars, self.table.Y.reshape((len(self.table.Y), -1))) + self.__metas = self.__filter_attributes( + domain.metas, self.table.metas) self.__attributes_set = set(self.__metas[0]) self.__class_vars_set = set(self.__class_vars[0]) self.__metas_set = set(self.__metas[0]) diff --git a/Orange/widgets/data/owrandomize.py b/Orange/widgets/data/owrandomize.py index 7556d0567e1..d29c40e3654 100644 --- a/Orange/widgets/data/owrandomize.py +++ b/Orange/widgets/data/owrandomize.py @@ -97,8 +97,9 @@ def apply(self): type_ = sum(t for t, p in zip(Randomize.Type, self.parts) if p) randomized = Randomize(type_, rand_seed)(self.data[indices]) data = self.data.copy() - for i, instance in zip(indices, randomized): - data[i] = instance + with data.unlocked(): + for i, instance in zip(indices, randomized): + data[i] = instance self.Outputs.data.send(data) def send_report(self): diff --git a/Orange/widgets/data/tests/test_owaggregatecolumns.py b/Orange/widgets/data/tests/test_owaggregatecolumns.py index 27c53adc784..b9939ada119 100644 --- a/Orange/widgets/data/tests/test_owaggregatecolumns.py +++ b/Orange/widgets/data/tests/test_owaggregatecolumns.py @@ -117,7 +117,8 @@ def test_operations(self): def test_operations_with_nan(self): domain = self.data1.domain self.send_signal(self.widget.Inputs.data, self.data1) - self.data1.X[1, 0] = np.nan + with self.data1.unlocked(): + self.data1.X[1, 0] = np.nan self.widget.variables = [domain[n] for n in "c1 c2 t2".split()] m1, m2 = 4 / 3, 5 / 2 diff --git a/Orange/widgets/data/tests/test_owcontinuize.py b/Orange/widgets/data/tests/test_owcontinuize.py index f717eebfdb5..2eea79e897b 100644 --- a/Orange/widgets/data/tests/test_owcontinuize.py +++ b/Orange/widgets/data/tests/test_owcontinuize.py @@ -51,8 +51,9 @@ def test_one_column_equal_values(self): GH-2144 """ table = Table("iris") - table = table[:, 1] - table[:] = 42.0 + table = table[:, 1].copy() + with table.unlocked(): + table[:] = 42.0 self.send_signal(self.widget.Inputs.data, table) # Normalize.NormalizeBySD self.widget.continuous_treatment = 2 @@ -66,13 +67,16 @@ def test_one_column_nan_values_normalize_sd(self): GH-2144 """ table = Table("iris") - table[:, 2] = np.NaN + with table.unlocked(): + table[:, 2] = np.NaN self.send_signal(self.widget.Inputs.data, table) # Normalize.NormalizeBySD self.widget.continuous_treatment = 2 self.widget.unconditional_commit() + table = Table("iris") - table[1, 2] = np.NaN + with table.unlocked(): + table[1, 2] = np.NaN self.send_signal(self.widget.Inputs.data, table) self.widget.unconditional_commit() @@ -83,13 +87,16 @@ def test_one_column_nan_values_normalize_span(self): GH-2144 """ table = Table("iris") - table[:, 2] = np.NaN + with table.unlocked(): + table[:, 2] = np.NaN self.send_signal(self.widget.Inputs.data, table) # Normalize.NormalizeBySpan self.widget.continuous_treatment = 1 self.widget.unconditional_commit() + table = Table("iris") - table[1, 2] = np.NaN + with table.unlocked(): + table[1, 2] = np.NaN self.send_signal(self.widget.Inputs.data, table) self.widget.unconditional_commit() diff --git a/Orange/widgets/data/tests/test_owcreateinstance.py b/Orange/widgets/data/tests/test_owcreateinstance.py index a0fa57d6d11..e7d26a4fe0f 100644 --- a/Orange/widgets/data/tests/test_owcreateinstance.py +++ b/Orange/widgets/data/tests/test_owcreateinstance.py @@ -64,7 +64,7 @@ def test_initialize_buttons(self): self.widget.controls.append_to_data.setChecked(False) self.send_signal(self.widget.Inputs.data, self.data) self.send_signal(self.widget.Inputs.reference, self.data[:1]) - output = self.get_output(self.widget.Outputs.data) + output = self.get_output(self.widget.Outputs.data).copy() buttons = self._get_init_buttons() @@ -78,7 +78,8 @@ def test_initialize_buttons(self): buttons[1].click() # Mean output_mean = self.get_output(self.widget.Outputs.data) - output.X = np.round(np.mean(self.data.X, axis=0), 1).reshape(1, 4) + with output.unlocked(): + output.X = np.round(np.mean(self.data.X, axis=0), 1).reshape(1, 4) self.assert_table_equal(output_mean, output) buttons[2].click() # Random @@ -149,7 +150,8 @@ def test_missing_values(self): def test_missing_values_reference(self): reference = self.data[:1].copy() - reference[:] = np.nan + with reference.unlocked(): + reference[:] = np.nan self.send_signal(self.widget.Inputs.data, self.data) self.send_signal(self.widget.Inputs.reference, reference) output1 = self.get_output(self.widget.Outputs.data) @@ -160,7 +162,8 @@ def test_missing_values_reference(self): def test_saved_workflow(self): data = self.data - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) buttons = self._get_init_buttons() buttons[2].click() # Random diff --git a/Orange/widgets/data/tests/test_owfeatureconstructor.py b/Orange/widgets/data/tests/test_owfeatureconstructor.py index 7ec9bd147b3..4a3e30508a5 100644 --- a/Orange/widgets/data/tests/test_owfeatureconstructor.py +++ b/Orange/widgets/data/tests/test_owfeatureconstructor.py @@ -306,7 +306,8 @@ def test_invalid_expression_variable(self): iris = Table("iris") f = FeatureFunc("1 / petal_length", [("petal_length", iris.domain["petal length"])]) - iris[0]["petal length"] = 0 + with iris.unlocked(): + iris[0]["petal length"] = 0 f.mask_exceptions = False self.assertRaises(Exception, f, iris) diff --git a/Orange/widgets/data/tests/test_owmergedata.py b/Orange/widgets/data/tests/test_owmergedata.py index 02b4ff37c77..bc82cf5974b 100644 --- a/Orange/widgets/data/tests/test_owmergedata.py +++ b/Orange/widgets/data/tests/test_owmergedata.py @@ -792,7 +792,7 @@ def test_sparse(self): self.assertTrue(sp.issparse(output_sparse.X)) self.assertTrue(output_sparse.is_sparse()) - output_sparse.X = output_sparse.X.toarray() + output_sparse = output_sparse.copy() self.assertTablesEqual(output_dense, output_sparse) def test_commit_on_new_data(self): diff --git a/Orange/widgets/data/tests/test_owneighbors.py b/Orange/widgets/data/tests/test_owneighbors.py index 3249c658a82..b10223209d8 100644 --- a/Orange/widgets/data/tests/test_owneighbors.py +++ b/Orange/widgets/data/tests/test_owneighbors.py @@ -107,7 +107,8 @@ def test_missing_values(self): widget = self.widget data = Table("iris") reference = data[:3] - data.X[0:10, 0] = np.nan + with data.unlocked(): + data.X[0:10, 0] = np.nan self.send_signal(widget.Inputs.data, self.iris) self.send_signal(widget.Inputs.reference, reference) widget.apply_button.button.click() diff --git a/Orange/widgets/data/tests/test_owpaintdata.py b/Orange/widgets/data/tests/test_owpaintdata.py index a8fda41d151..01bbd3fdc2f 100644 --- a/Orange/widgets/data/tests/test_owpaintdata.py +++ b/Orange/widgets/data/tests/test_owpaintdata.py @@ -78,8 +78,9 @@ def test_sparse_data(self): GH-2298 GH-2163 """ - data = Table("iris")[::25] - data.X = sp.csr_matrix(data.X) + data = Table("iris")[::25].copy() + with data.unlocked(): + data.X = sp.csr_matrix(data.X) self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Warning.sparse_not_supported.is_shown()) self.send_signal(self.widget.Inputs.data, None) diff --git a/Orange/widgets/data/tests/test_owpivot.py b/Orange/widgets/data/tests/test_owpivot.py index f817f21351f..e6492781da6 100644 --- a/Orange/widgets/data/tests/test_owpivot.py +++ b/Orange/widgets/data/tests/test_owpivot.py @@ -345,8 +345,10 @@ def test_group_table_metas(self): Dv("d2", ("a", "b")), Cv("c2")]) X = np.array([[0, 1, 0, 2], [1, 2, np.nan, 3], [0, 3, 1, np.nan]]) table = Table(domain, X).transform( - Domain(domain.attributes[:2], metas=domain.attributes[2:])) - table.metas = table.metas.astype(object) + Domain(domain.attributes[:2], metas=domain.attributes[2:]), + copy=True) + with table.unlocked(): + table.metas = table.metas.astype(object) pivot = Pivot(table, Pivot.Functions, table.domain[-1]) group_tab = pivot.group_table diff --git a/Orange/widgets/data/tests/test_owpreprocess.py b/Orange/widgets/data/tests/test_owpreprocess.py index cfdf1456c40..8499f24ba6a 100644 --- a/Orange/widgets/data/tests/test_owpreprocess.py +++ b/Orange/widgets/data/tests/test_owpreprocess.py @@ -41,7 +41,8 @@ def test_randomize(self): def test_remove_sparse(self): data = Table("iris") idx = int(data.X.shape[0]/10) - data.X[:idx+1, 0] = np.zeros((idx+1,)) + with data.unlocked(): + data.X[:idx+1, 0] = np.zeros((idx+1,)) saved = {"preprocessors": [("orange.preprocess.remove_sparse", {'filter0': True, 'useFixedThreshold': False, 'percThresh':10, 'fixedThresh': 50})]} diff --git a/Orange/widgets/data/tests/test_owrank.py b/Orange/widgets/data/tests/test_owrank.py index c015f313dd3..8af4ac4d936 100644 --- a/Orange/widgets/data/tests/test_owrank.py +++ b/Orange/widgets/data/tests/test_owrank.py @@ -315,7 +315,8 @@ def test_scores_sorting(self): def test_scores_nan_sorting(self): """Check NaNs are sorted last""" data = self.iris.copy() - data.get_column_view('petal length')[0][:] = np.nan + with data.unlocked(): + data.get_column_view('petal length')[0][:] = np.nan self.send_signal(self.widget.Inputs.data, data) self.wait_until_finished() diff --git a/Orange/widgets/data/tests/test_owsave.py b/Orange/widgets/data/tests/test_owsave.py index 2f17a3d186a..144ff76fbdf 100644 --- a/Orange/widgets/data/tests/test_owsave.py +++ b/Orange/widgets/data/tests/test_owsave.py @@ -167,7 +167,8 @@ def test_save_file_checks_can_save(self): widget.writer.write.assert_called() widget.writer.reset_mock() - self.iris.X = sp.csr_matrix(self.iris.X) + with self.iris.unlocked(): + self.iris.X = sp.csr_matrix(self.iris.X) widget.save_file() widget.writer.write.assert_not_called() @@ -239,7 +240,8 @@ def test_sparse_error(self): widget.update_messages() self.assertFalse(err.is_shown()) - widget.data.X = sp.csr_matrix(widget.data.X) + with self.iris.unlocked(): + widget.data.X = sp.csr_matrix(widget.data.X) widget.update_messages() self.assertTrue(err.is_shown()) @@ -264,7 +266,8 @@ def test_valid_filters_for_sparse(self): widget.data = self.iris self.assertEqual(widget.get_filters(), widget.valid_filters()) - widget.data.X = sp.csr_matrix(widget.data.X) + with self.iris.unlocked(): + widget.data.X = sp.csr_matrix(widget.data.X) valid = widget.valid_filters() self.assertNotEqual(widget.get_filters(), {}) # false positive, pylint: disable=no-member @@ -282,7 +285,8 @@ def test_valid_default_filter(self): widget.data = self.iris self.assertIs(widget.filter, widget.default_valid_filter()) - widget.data.X = sp.csr_matrix(widget.data.X) + with self.iris.unlocked(): + widget.data.X = sp.csr_matrix(widget.data.X) self.assertTrue( widget.get_filters()[widget.default_valid_filter()] .SUPPORT_SPARSE_DATA) @@ -381,7 +385,8 @@ def test_save_uncompressed(self): widget.auto_save = False spiris = Table("iris") - spiris.X = sp.csr_matrix(spiris.X) + with spiris.unlocked(): + spiris.X = sp.csr_matrix(spiris.X) for selected_filter, writer in widget.get_filters().items(): widget.write = writer diff --git a/Orange/widgets/evaluate/owconfusionmatrix.py b/Orange/widgets/evaluate/owconfusionmatrix.py index 8b539cb30dd..a005771d9a0 100644 --- a/Orange/widgets/evaluate/owconfusionmatrix.py +++ b/Orange/widgets/evaluate/owconfusionmatrix.py @@ -393,8 +393,9 @@ def _prepare_data(self): metas) data = self.data.transform(domain) if extra: - data.metas[:, len(self.data.domain.metas):] = \ - np.hstack(tuple(extra)) + with data.unlocked(data.metas): + data.metas[:, len(self.data.domain.metas):] = \ + np.hstack(tuple(extra)) data.name = learner_name if selected: diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py index 4be4959c081..483954e03fe 100644 --- a/Orange/widgets/evaluate/owpredictions.py +++ b/Orange/widgets/evaluate/owpredictions.py @@ -598,7 +598,8 @@ def _commit_predictions(self): if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) - predictions.metas[:, -newcolumns.shape[1]:] = newcolumns + with predictions.unlocked(predictions.metas): + predictions.metas[:, -newcolumns.shape[1]:] = newcolumns index = self.dataview.model().index map_to = self.dataview.model().mapToSource diff --git a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py index ab56c804509..87f1fdc9e17 100644 --- a/Orange/widgets/evaluate/tests/test_owcalibrationplot.py +++ b/Orange/widgets/evaluate/tests/test_owcalibrationplot.py @@ -614,6 +614,7 @@ def test_single_class_folds(self, *_): results = self.lenses_results results.folds = [slice(0, 5), slice(5, 19)] results.models = results.models.repeat(2, axis=0) + results.actual = results.actual.copy() results.actual[:3] = 0 results.probabilities[1, 3:5] = np.nan # after this, model 1 has just negative instances in fold 0 diff --git a/Orange/widgets/evaluate/tests/test_owliftcurve.py b/Orange/widgets/evaluate/tests/test_owliftcurve.py index 9ba1ade2ece..5a730d729da 100644 --- a/Orange/widgets/evaluate/tests/test_owliftcurve.py +++ b/Orange/widgets/evaluate/tests/test_owliftcurve.py @@ -51,6 +51,7 @@ def test_empty_input(self): def test_nan_input(self): res = copy.copy(self.res) + res.actual = res.actual.copy() res.actual[0] = np.nan self.send_signal(self.widget.Inputs.evaluation_results, res) self.assertTrue(self.widget.Error.invalid_results.is_shown()) diff --git a/Orange/widgets/evaluate/tests/test_owpredictions.py b/Orange/widgets/evaluate/tests/test_owpredictions.py index 28b2e15f5ce..52511c0339c 100644 --- a/Orange/widgets/evaluate/tests/test_owpredictions.py +++ b/Orange/widgets/evaluate/tests/test_owpredictions.py @@ -41,7 +41,8 @@ def test_rowCount_from_model(self): def test_nan_target_input(self): data = self.iris[::10].copy() - data.Y[1] = np.nan + with data.unlocked(): + data.Y[1] = np.nan yvec, _ = data.get_column_view(data.domain.class_var) self.send_signal(self.widget.Inputs.data, data) self.send_signal(self.widget.Inputs.predictors, ConstantLearner()(data), 1) @@ -58,7 +59,8 @@ def test_nan_target_input(self): self.assertTrue(np.all(~np.isnan(ev_yvec))) self.assertTrue(np.all(~np.isnan(evres.actual))) - data.Y[:] = np.nan + with data.unlocked(): + data.Y[:] = np.nan self.send_signal(self.widget.Inputs.data, data) evres = self.get_output(self.widget.Outputs.evaluation_results) self.assertEqual(len(evres.data), 0) diff --git a/Orange/widgets/evaluate/tests/test_owtestandscore.py b/Orange/widgets/evaluate/tests/test_owtestandscore.py index 1d84d8557f7..7c9506ee832 100644 --- a/Orange/widgets/evaluate/tests/test_owtestandscore.py +++ b/Orange/widgets/evaluate/tests/test_owtestandscore.py @@ -95,9 +95,11 @@ def test_testOnTest_incompatible_domain(self): self.widget.resampling = OWTestAndScore.TestOnTest # test data with the same class (otherwise the widget shows a different error) # and a non-nan X - iris_test = iris.transform(Domain([ContinuousVariable("x")], - class_vars=iris.domain.class_vars)) - iris_test.X[:, 0] = 1 + iris_test = iris.transform( + Domain([ContinuousVariable("x")], class_vars=iris.domain.class_vars), + copy=True) + with iris_test.unlocked(): + iris_test.X[:, 0] = 1 self.send_signal(self.widget.Inputs.test_data, iris_test) self.get_output(self.widget.Outputs.evaluations_results, wait=5000) self.assertTrue(self.widget.Error.test_data_incompatible.is_shown()) @@ -179,8 +181,9 @@ def assertErrorShown(data, is_shown, message): self.assertEqual(is_shown, self.widget.Error.train_data_error.is_shown()) self.assertEqual(message, str(self.widget.Error.train_data_error)) - data = Table("iris")[::30] - data.Y[:] = np.nan + data = Table("iris")[::30].copy() + with data.unlocked(): + data.Y[:] = np.nan iris_empty_x = Table.from_table( Domain([], data.domain.class_var), Table("iris") diff --git a/Orange/widgets/model/tests/test_owlogisticregression.py b/Orange/widgets/model/tests/test_owlogisticregression.py index c8d37c7790e..411108e2e87 100644 --- a/Orange/widgets/model/tests/test_owlogisticregression.py +++ b/Orange/widgets/model/tests/test_owlogisticregression.py @@ -105,7 +105,8 @@ def test_target_with_nan(self): GH-2392 """ table = Table("iris") - table.Y[:5] = np.NaN + with table.unlocked(): + table.Y[:5] = np.NaN self.send_signal("Data", table) coef1 = self.get_output("Coefficients") table = table[5:] diff --git a/Orange/widgets/model/tests/test_owrulesclassification.py b/Orange/widgets/model/tests/test_owrulesclassification.py index bdec6d4fef4..bff6f4f2a1e 100644 --- a/Orange/widgets/model/tests/test_owrulesclassification.py +++ b/Orange/widgets/model/tests/test_owrulesclassification.py @@ -110,7 +110,8 @@ def test_alpha_double_spin_boxes(self): def test_sparse_data(self): data = Table("iris") - data.X = sparse.csr_matrix(data.X) + with data.unlocked(): + data.X = sparse.csr_matrix(data.X) self.assertTrue(sparse.issparse(data.X)) self.send_signal("Data", data) self.widget.apply_button.button.click() diff --git a/Orange/widgets/model/tests/test_owsvm.py b/Orange/widgets/model/tests/test_owsvm.py index aad3424e71d..af84ec49351 100644 --- a/Orange/widgets/model/tests/test_owsvm.py +++ b/Orange/widgets/model/tests/test_owsvm.py @@ -96,6 +96,8 @@ def test_sparse_warning(self): data = Table("iris") self.send_signal("Data", data) self.assertFalse(self.widget.Warning.sparse_data.is_shown()) - data.X = csr_matrix(data.X) + + with data.unlocked(): + data.X = csr_matrix(data.X) self.send_signal("Data", data) self.assertTrue(self.widget.Warning.sparse_data.is_shown()) diff --git a/Orange/widgets/tests/base.py b/Orange/widgets/tests/base.py index c8472bcca41..46208be764a 100644 --- a/Orange/widgets/tests/base.py +++ b/Orange/widgets/tests/base.py @@ -831,7 +831,8 @@ def assertFontEqual(self, font1, font2): class AnchorProjectionWidgetTestMixin(ProjectionWidgetTestMixin): def test_embedding_missing_values(self): table = Table("heart_disease") - table.X[0] = np.nan + with table.unlocked(): + table.X[0] = np.nan self.send_signal(self.widget.Inputs.data, table) self.assertFalse(np.all(self.widget.valid_data)) output = self.get_output(ANNOTATED_DATA_SIGNAL_NAME) @@ -842,7 +843,8 @@ def test_embedding_missing_values(self): def test_sparse_data(self, timeout=DEFAULT_TIMEOUT): table = Table("iris") - table.X = sp.csr_matrix(table.X) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.send_signal(self.widget.Inputs.data, table) self.assertTrue(self.widget.Error.sparse_data.is_shown()) @@ -853,7 +855,8 @@ def test_sparse_data(self, timeout=DEFAULT_TIMEOUT): def test_manual_move(self): data = self.data.copy() - data[1, 0] = np.nan + with data.unlocked(): + data[1, 0] = np.nan nvalid, nsample = len(self.data) - 1, self.widget.SAMPLE_SIZE self.send_signal(self.widget.Inputs.data, data) self.widget.graph.select_by_indices(list(range(0, len(data), 10))) @@ -962,7 +965,8 @@ def data_one_column_vals(cls, value=np.nan): ["", "", "", ""], "ynyn" ))) - table[:, 1] = value + with table.unlocked(): + table[:, 1] = value return table @classmethod diff --git a/Orange/widgets/unsupervised/owhierarchicalclustering.py b/Orange/widgets/unsupervised/owhierarchicalclustering.py index 6d3f8712078..bc97712d55a 100644 --- a/Orange/widgets/unsupervised/owhierarchicalclustering.py +++ b/Orange/widgets/unsupervised/owhierarchicalclustering.py @@ -601,7 +601,8 @@ def commit(self): var_name, values=values + ["Other"]) domain = Orange.data.Domain(attrs, classes, metas + (clust_var,)) data = items.transform(domain) - data.get_column_view(clust_var)[0][:] = c + with data.unlocked(data.metas): + data.get_column_view(clust_var)[0][:] = c if selected_indices: selected_data = data[mask] diff --git a/Orange/widgets/unsupervised/owkmeans.py b/Orange/widgets/unsupervised/owkmeans.py index c135503f5f2..462e7b4c71f 100644 --- a/Orange/widgets/unsupervised/owkmeans.py +++ b/Orange/widgets/unsupervised/owkmeans.py @@ -543,8 +543,9 @@ def send_data(self): new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) - new_table.get_column_view(cluster_var)[0][:] = clust_ids - new_table.get_column_view(silhouette_var)[0][:] = scores + with new_table.unlocked(new_table.metas): + new_table.get_column_view(cluster_var)[0][:] = clust_ids + new_table.get_column_view(silhouette_var)[0][:] = scores domain_attributes = set(domain.attributes) centroid_attributes = [ @@ -556,8 +557,12 @@ def send_data(self): centroid_domain = add_columns( Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) + # Table is constructed from a copy of centroids: if data is stored in + # the widget, it can be modified, so the widget should preferrably + # output a copy. The number of centroids is small, hence copying it is + # cheap. centroids = Table( - centroid_domain, km.centroids, None, + centroid_domain, km.centroids.copy(), None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores)) diff --git a/Orange/widgets/unsupervised/owlouvainclustering.py b/Orange/widgets/unsupervised/owlouvainclustering.py index ee3296811b3..9c9b9ee2b12 100644 --- a/Orange/widgets/unsupervised/owlouvainclustering.py +++ b/Orange/widgets/unsupervised/owlouvainclustering.py @@ -386,7 +386,8 @@ def _send_data(self): new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) - new_table.get_column_view(cluster_var)[0][:] = new_partition + with new_table.unlocked(new_table.metas): + new_table.get_column_view(cluster_var)[0][:] = new_partition self.Outputs.annotated_data.send(new_table) diff --git a/Orange/widgets/unsupervised/owtsne.py b/Orange/widgets/unsupervised/owtsne.py index bc6b8443947..cb228076a0b 100644 --- a/Orange/widgets/unsupervised/owtsne.py +++ b/Orange/widgets/unsupervised/owtsne.py @@ -595,7 +595,8 @@ def _get_projection_data(self): self.data.domain.metas + self._get_projection_variables() ) ) - data.metas[:, -2:] = self.get_embedding() + with data.unlocked(data.metas): + data.metas[:, -2:] = self.get_embedding() if self.tsne_embedding is not None: data.domain = Domain( self.data.domain.attributes, diff --git a/Orange/widgets/unsupervised/tests/test_owdbscan.py b/Orange/widgets/unsupervised/tests/test_owdbscan.py index abbf6fcd986..3186e724681 100644 --- a/Orange/widgets/unsupervised/tests/test_owdbscan.py +++ b/Orange/widgets/unsupervised/tests/test_owdbscan.py @@ -132,7 +132,8 @@ def test_change_metric_idx(self): simulate.combobox_activate_index(cbox, 0) # Euclidean def test_sparse_csr_data(self): - self.iris.X = csr_matrix(self.iris.X) + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X) w = self.widget @@ -149,7 +150,8 @@ def test_sparse_csr_data(self): self.assertEqual("DBSCAN Core", str(output.domain.metas[1])) def test_sparse_csc_data(self): - self.iris.X = csc_matrix(self.iris.X) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X) w = self.widget @@ -226,7 +228,8 @@ def test_data_retain_ids(self): def test_missing_data(self): w = self.widget - self.iris[1:5, 1] = np.nan + with self.iris.unlocked(): + self.iris[1:5, 1] = np.nan self.send_signal(w.Inputs.data, self.iris) output = self.get_output(w.Outputs.annotated_data) self.assertTupleEqual((150, 1), output[:, "Cluster"].metas.shape) @@ -244,7 +247,7 @@ def test_normalize_data(self): clusters = DBSCAN(**kwargs)(data) output = self.get_output(self.widget.Outputs.annotated_data) - output_clusters = output.metas[:, 0] + output_clusters = output.metas[:, 0].copy() output_clusters[np.isnan(output_clusters)] = -1 np.testing.assert_array_equal(output_clusters, clusters) @@ -259,7 +262,7 @@ def test_normalize_data(self): clusters = DBSCAN(**kwargs)(data) output = self.get_output(self.widget.Outputs.annotated_data) - output_clusters = output.metas[:, 0] + output_clusters = output.metas[:, 0].copy() output_clusters[np.isnan(output_clusters)] = -1 np.testing.assert_array_equal(output_clusters, clusters) diff --git a/Orange/widgets/unsupervised/tests/test_owdistances.py b/Orange/widgets/unsupervised/tests/test_owdistances.py index 7935fc73c85..d2b42322f11 100644 --- a/Orange/widgets/unsupervised/tests/test_owdistances.py +++ b/Orange/widgets/unsupervised/tests/test_owdistances.py @@ -18,13 +18,16 @@ class TestDistanceRunner(unittest.TestCase): def setUpClass(cls): super().setUpClass() cls.iris = Table("iris")[::5] - cls.iris.X[0, 2] = np.nan - cls.iris.X[1, 3] = np.nan - cls.iris.X[2, 1] = np.nan + with cls.iris.unlocked(): + cls.iris.X[0, 2] = np.nan + cls.iris.X[1, 3] = np.nan + cls.iris.X[2, 1] = np.nan + cls.zoo = Table("zoo")[::5] - cls.zoo.X[0, 2] = np.nan - cls.zoo.X[1, 3] = np.nan - cls.zoo.X[2, 1] = np.nan + with cls.zoo.unlocked(): + cls.zoo.X[0, 2] = np.nan + cls.zoo.X[1, 3] = np.nan + cls.zoo.X[2, 1] = np.nan def test_run(self): state = Mock() @@ -66,13 +69,10 @@ def assertDistMatrixEqual(self, dist1, dist2): class TestOWDistances(WidgetTest): - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.iris = Table("iris")[::5] - cls.titanic = Table("titanic")[::10] - def setUp(self): + super().setUp() + self.iris = Table("iris")[::5] + self.titanic = Table("titanic")[::10] self.widget = self.create_widget(OWDistances) def test_distance_combo(self): @@ -171,14 +171,14 @@ def test_migrates_normalized_dist(self): self.assertFalse(w.normalized_dist) def test_negative_values_bhattacharyya(self): - self.iris.X[0, 0] *= -1 + with self.iris.unlocked(): + self.iris.X[0, 0] *= -1 for self.widget.metric_idx, (_, metric) in enumerate(METRICS): if metric == distance.Bhattacharyya: break self.send_signal(self.widget.Inputs.data, self.iris) self.wait_until_finished() self.assertTrue(self.widget.Error.distances_value_error.is_shown()) - self.iris.X[0, 0] *= -1 def test_limit_mahalanobis(self): def assert_error_shown(): diff --git a/Orange/widgets/unsupervised/tests/test_owkmeans.py b/Orange/widgets/unsupervised/tests/test_owkmeans.py index a77b5f8c01f..9f122d41830 100644 --- a/Orange/widgets/unsupervised/tests/test_owkmeans.py +++ b/Orange/widgets/unsupervised/tests/test_owkmeans.py @@ -490,7 +490,8 @@ def test_do_not_recluster_on_same_data(self): ) # X is different, should cause update table3 = table1.copy() - table3.X[:, 0] = 1 + with table3.unlocked(): + table3.X[:, 0] = 1 with patch.object(self.widget, 'unconditional_commit') as commit: self.send_signal(self.widget.Inputs.data, table1) diff --git a/Orange/widgets/unsupervised/tests/test_owlouvain.py b/Orange/widgets/unsupervised/tests/test_owlouvain.py index 2881d6c9fe1..7a0443de4e9 100644 --- a/Orange/widgets/unsupervised/tests/test_owlouvain.py +++ b/Orange/widgets/unsupervised/tests/test_owlouvain.py @@ -64,7 +64,8 @@ def test_empty_dataset(self): meta = np.array([0] * 5) meta_var = ContinuousVariable(name='meta_var') table = Table.from_domain(domain=Domain([], metas=[meta_var]), n_rows=5) - table.get_column_view(meta_var)[0][:] = meta + with table.unlocked(): + table.get_column_view(meta_var)[0][:] = meta self.send_signal(self.widget.Inputs.data, table) self.commit_and_wait() @@ -89,7 +90,8 @@ def test_do_not_recluster_on_same_data(self): ) # X is different, should cause update table3 = table1.copy() - table3.X[:, 0] = 1 + with table3.unlocked(): + table3.X[:, 0] = 1 with patch.object(self.widget, '_invalidate_output') as commit: self.send_signal(self.widget.Inputs.data, table1) @@ -216,7 +218,8 @@ def test_dense_and_sparse_return_same_result(self): # Randomly set some values to zero dense_data = self.iris mask = random_state.beta(1, 2, size=self.iris.X.shape) > 0.5 - dense_data.X[mask] = 0 + with dense_data.unlocked(): + dense_data.X[mask] = 0 sparse_data = dense_data.to_sparse() def _compute_clustering(data): diff --git a/Orange/widgets/unsupervised/tests/test_owmds.py b/Orange/widgets/unsupervised/tests/test_owmds.py index 104d7129738..8b571cd0573 100644 --- a/Orange/widgets/unsupervised/tests/test_owmds.py +++ b/Orange/widgets/unsupervised/tests/test_owmds.py @@ -92,9 +92,10 @@ def combobox_run_through_all(): self.send_signal(self.widget.Inputs.data, None) combobox_run_through_all() - data.X[:, 0] = np.nan - data.Y[:] = np.nan - data.metas[:, 1] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan + data.Y[:] = np.nan + data.metas[:, 1] = np.nan self.send_signal(self.widget.Inputs.data, data, wait=1000) combobox_run_through_all() diff --git a/Orange/widgets/unsupervised/tests/test_owpca.py b/Orange/widgets/unsupervised/tests/test_owpca.py index 55ced0798d7..e7e1b5cf507 100644 --- a/Orange/widgets/unsupervised/tests/test_owpca.py +++ b/Orange/widgets/unsupervised/tests/test_owpca.py @@ -28,7 +28,8 @@ def test_set_variance100(self): def test_constant_data(self): data = self.iris[::5] - data.X[:, :] = 1.0 + with data.unlocked(): + data.X[:, :] = 1.0 # Ignore the warning: the test checks whether the widget shows # Warning.trivial_components when this happens with np.errstate(invalid="ignore"): @@ -183,7 +184,8 @@ def test_normalized_gives_correct_result(self, prepare_table): # Randomly set some values to zero random_state = check_random_state(42) mask = random_state.beta(1, 2, size=self.iris.X.shape) > 0.5 - self.iris.X[mask] = 0 + with self.iris.unlocked(): + self.iris.X[mask] = 0 data = prepare_table(self.iris) diff --git a/Orange/widgets/unsupervised/tests/test_owsom.py b/Orange/widgets/unsupervised/tests/test_owsom.py index c3666d463a5..0dd161e3b0f 100644 --- a/Orange/widgets/unsupervised/tests/test_owsom.py +++ b/Orange/widgets/unsupervised/tests/test_owsom.py @@ -80,8 +80,9 @@ def test_missing_all_data(self): self.send_signal(widget.Inputs.data, Table("heart_disease")) self.assertTrue(widget.Warning.ignoring_disc_variables.is_shown()) - for i in range(150): - self.iris.X[i, i % 4] = np.nan + with self.iris.unlocked(): + for i in range(150): + self.iris.X[i, i % 4] = np.nan self.send_signal(widget.Inputs.data, self.iris) self.assertTrue(widget.Error.no_defined_rows.is_shown()) @@ -94,7 +95,8 @@ def test_missing_all_data(self): def test_missing_some_data(self): widget = self.widget - self.iris.X[:50, 0] = np.nan + with self.iris.unlocked(): + self.iris.X[:50, 0] = np.nan self.send_signal(widget.Inputs.data, self.iris) self.assertFalse(widget.Error.no_defined_rows.is_shown()) @@ -108,7 +110,8 @@ def test_missing_some_data(self): def test_missing_one_row_data(self): widget = self.widget - self.iris.X[5, 0] = np.nan + with self.iris.unlocked(): + self.iris.X[5, 0] = np.nan self.send_signal(widget.Inputs.data, self.iris) self.assertFalse(widget.Error.no_defined_rows.is_shown()) @@ -120,7 +123,8 @@ def test_missing_one_row_data(self): @_patch_recompute_som def test_sparse_data(self): widget = self.widget - self.iris.X = sp.csc_matrix(self.iris.X) + with self.iris.unlocked(): + self.iris.X = sp.csc_matrix(self.iris.X) # Table.from_table can decide to return dense data with patch.object(Table, "from_table", lambda _, x: x): @@ -386,7 +390,8 @@ def test_pie_charts(self): self.assertEqual(e.y(), y) self.assertEqual(e.r, r / 2) - self.iris.Y[:15] = np.nan + with self.iris.unlocked(): + self.iris.Y[:15] = np.nan self.send_signal(widget.Inputs.data, self.iris) a = widget.elements.childItems()[0] np.testing.assert_equal(a.dist, [0.5, 0, 0, 0.5]) @@ -398,8 +403,9 @@ def test_get_color_column(self): domain = table.domain new_domain = Domain( domain.attributes[3:], domain.class_var, domain.attributes[:3]) - new_table = table.transform(new_domain) - new_table.metas = new_table.metas.astype(object) + new_table = table.transform(new_domain, copy=True) + with new_table.unlocked(new_table.metas): + new_table.metas = new_table.metas.astype(object) self.send_signal(widget.Inputs.data, new_table) # discrete attribute @@ -441,15 +447,17 @@ def test_get_color_column(self): # discrete meta with missing values widget.attr_color = domain["gender"] - col = widget.data.get_column_view("gender")[0] - col[:5] = np.nan + with widget.data.unlocked(): + col = widget.data.get_column_view("gender")[0] + col[:5] = np.nan col = col.copy() col[:5] = 2 np.testing.assert_equal(widget._get_color_column(), col) @_patch_recompute_som def test_colored_circles_with_missing_values(self): - self.iris.get_column_view("iris")[0][:5] = np.nan + with self.iris.unlocked(): + self.iris.get_column_view("iris")[0][:5] = np.nan self.send_signal(self.widget.Inputs.data, self.iris) self.assertTrue(self.widget.Warning.missing_colors.is_shown()) diff --git a/Orange/widgets/utils/annotated_data.py b/Orange/widgets/utils/annotated_data.py index fea91dae790..46149c15291 100644 --- a/Orange/widgets/utils/annotated_data.py +++ b/Orange/widgets/utils/annotated_data.py @@ -35,11 +35,14 @@ def _table_with_annotation_column(data, values, column_data, var_name): class_vars, metas = data.domain.class_vars, data.domain.metas if not data.domain.class_vars: class_vars += (var, ) + column_data = column_data.reshape((len(data), )) else: metas += (var, ) + column_data = column_data.reshape((len(data), 1)) domain = Domain(data.domain.attributes, class_vars, metas) table = data.transform(domain) - table[:, var] = column_data.reshape((len(data), 1)) + with table.unlocked(table.Y if not data.domain.class_vars else table.metas): + table[:, var] = column_data return table diff --git a/Orange/widgets/utils/tests/test_owlearnerwidget.py b/Orange/widgets/utils/tests/test_owlearnerwidget.py index e027d4a7acd..374d45d1a2e 100644 --- a/Orange/widgets/utils/tests/test_owlearnerwidget.py +++ b/Orange/widgets/utils/tests/test_owlearnerwidget.py @@ -125,8 +125,9 @@ class WidgetLR(OWBaseLearner): multinomial_treatment=continuize.Continuize.AsOrdinal, transform_class=True, ) - data = self.iris.transform(pp(self.iris)) - data.Y = sp.csr_matrix(data.Y) + data = self.iris.transform(pp(self.iris), copy=True) + with data.unlocked(): + data.Y = sp.csr_matrix(data.Y) self.send_signal(w.Inputs.data, data, widget=w) self.assertFalse(any(w.Error.active)) diff --git a/Orange/widgets/visualize/owlinearprojection.py b/Orange/widgets/visualize/owlinearprojection.py index 8541969d099..c221f0a48b3 100644 --- a/Orange/widgets/visualize/owlinearprojection.py +++ b/Orange/widgets/visualize/owlinearprojection.py @@ -155,8 +155,9 @@ def normalized(a): if v.is_continuous and v is not attr_color], class_vars=attr_color ) - data = self.master.data.transform(domain) - data.X = normalized(data.X) + data = self.master.data.transform(domain, copy=True) + with data.unlocked(): + data.X = normalized(data.X) relief = ReliefF if attr_color.is_discrete else RReliefF weights = relief(n_iterations=100, k_nearest=self.minK)(data) results = sorted(zip(weights, domain.attributes), key=lambda x: (-x[0], x[1].name)) diff --git a/Orange/widgets/visualize/owsilhouetteplot.py b/Orange/widgets/visualize/owsilhouetteplot.py index b999e12aa76..1e9baede1a1 100644 --- a/Orange/widgets/visualize/owsilhouetteplot.py +++ b/Orange/widgets/visualize/owsilhouetteplot.py @@ -503,15 +503,18 @@ def commit(self): domain.attributes, domain.class_vars, domain.metas + (silhouette_var, )) - data = self.data.transform(domain) if np.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, np.flatnonzero(selectedmask)) if selected is not None: - selected[:, silhouette_var] = np.c_[scores[selectedmask]] - data[:, silhouette_var] = np.c_[scores] + with selected.unlocked(selected.metas): + selected[:, silhouette_var] = np.c_[scores[selectedmask]] + + data = self.data.transform(domain) + with data.unlocked(data.metas): + data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) diff --git a/Orange/widgets/visualize/tests/test_owboxplot.py b/Orange/widgets/visualize/tests/test_owboxplot.py index 4896051acef..21ca8c90dd2 100644 --- a/Orange/widgets/visualize/tests/test_owboxplot.py +++ b/Orange/widgets/visualize/tests/test_owboxplot.py @@ -66,21 +66,24 @@ def test_primitive_metas(self): def test_input_data_missings_cont_group_var(self): """Check widget with continuous data with missing values and group variable""" data = self.iris.copy() - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) # used to crash, see #1568 def test_input_data_missings_cont_no_group_var(self): """Check widget with continuous data with missing values and no group variable""" data = self.housing - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) # used to crash, see #1568 def test_input_data_missings_disc_group_var(self): """Check widget with discrete data with missing values and group variable""" data = self.zoo - data.X[:, 1] = np.nan + with data.unlocked(): + data.X[:, 1] = np.nan # This is a test and does it at its own risk: # pylint: disable=protected-access data.domain.attributes[1]._values = [] @@ -93,7 +96,8 @@ def test_input_data_missings_disc_no_group_var(self): """Check widget discrete data with missing values and no group variable""" data = self.zoo data.domain.class_var = ContinuousVariable("cls") - data.X[:, 1] = np.nan + with data.unlocked(): + data.X[:, 1] = np.nan # This is a test and does it at its own risk: # pylint: disable=protected-access data.domain.attributes[1]._values = [] @@ -253,7 +257,8 @@ def test_empty_groups(self): # select rows with US State equal to TX or MO use_indexes = np.array([0, 1, 25, 26, 27]) - table.X = table.X[use_indexes] + with table.unlocked(): + table.X = table.X[use_indexes] self.send_signal(self.widget.Inputs.data, table) self.assertEqual(2, len(self.widget.boxes)) diff --git a/Orange/widgets/visualize/tests/test_owdistributions.py b/Orange/widgets/visualize/tests/test_owdistributions.py index 22eddc30cc2..023b239f2dd 100644 --- a/Orange/widgets/visualize/tests/test_owdistributions.py +++ b/Orange/widgets/visualize/tests/test_owdistributions.py @@ -213,9 +213,10 @@ def test_switch_cvar(self): y = self.iris.domain.class_var extra = DiscreteVariable("foo", values=("a", "b")) domain = Domain(self.iris.domain.attributes + (extra, ), y) - data = self.iris.transform(domain) - data.X[:75, -1] = 0 - data.X[75:120, -1] = 1 + data = self.iris.transform(domain, copy=True) + with data.unlocked(): + data.X[:75, -1] = 0 + data.X[75:120, -1] = 1 self.send_signal(widget.Inputs.data, data) self._set_var(2) self._set_cvar(y) @@ -288,11 +289,12 @@ def test_set_valid_data(self): self.assertIsNotNone(widget.valid_group_data) self.assertTrue(widget.is_valid) - X, Y = self.iris.X, self.iris.Y - X[:, 0] = np.nan - X[:50, 1] = np.nan - X[:100, 2] = np.nan - Y[75:] = np.nan + with self.iris.unlocked(): + X, Y = self.iris.X, self.iris.Y + X[:, 0] = np.nan + X[:50, 1] = np.nan + X[:100, 2] = np.nan + Y[75:] = np.nan self.send_signal(widget.Inputs.data, self.iris) self._set_var(domain[0]) diff --git a/Orange/widgets/visualize/tests/test_owheatmap.py b/Orange/widgets/visualize/tests/test_owheatmap.py index c61075f77dd..763b536cfc3 100644 --- a/Orange/widgets/visualize/tests/test_owheatmap.py +++ b/Orange/widgets/visualize/tests/test_owheatmap.py @@ -143,7 +143,8 @@ def test_cluster_column_on_all_zero_column(self): # Pearson distance used for clustering of columns does not # handle all zero columns well iris = Table("iris") - iris[:, 0] = 0 + with iris.unlocked(): + iris[:, 0] = 0 self.widget.col_clustering = True self.widget.set_dataset(iris) @@ -218,7 +219,8 @@ def test_set_split_var(self): def test_set_split_var_missing(self): data = self.brown_selected[::3].copy() - data.Y[::5] = np.nan + with data.unlocked(): + data.Y[::5] = np.nan w = self.widget self.send_signal(self.widget.Inputs.data, data, widget=w) self.assertIs(w.split_by_var, data.domain.class_var) @@ -246,7 +248,8 @@ def test_set_split_column_key(self): def test_set_split_column_key_missing(self): data = self._brown_selected_10() - data.Y[:5] = np.nan + with data.unlocked(): + data.Y[:5] = np.nan data_t = data.transpose(data) function = data.domain["function"] w = self.widget @@ -328,15 +331,17 @@ def test_row_color_annotations(self): def test_row_color_annotations_with_na(self): widget = self.widget - data = self._brown_selected_10() - data.Y[:3] = np.nan - data.metas[:3, -1] = np.nan + data = self._brown_selected_10() + with data.unlocked(): + data.Y[:3] = np.nan + data.metas[:3, -1] = np.nan self.send_signal(widget.Inputs.data, data, widget=widget) widget.set_annotation_color_var(data.domain["function"]) self.assertTrue(widget.scene.widget.right_side_colors[0].isVisible()) widget.set_annotation_color_var(data.domain["diau g"]) - data.Y[:] = np.nan - data.metas[:, -1] = np.nan + with data.unlocked(): + data.Y[:] = np.nan + data.metas[:, -1] = np.nan self.send_signal(widget.Inputs.data, data, widget=widget) widget.set_annotation_color_var(data.domain["function"]) widget.set_annotation_color_var(data.domain["diau g"]) @@ -359,15 +364,17 @@ def test_col_color_annotations(self): def test_col_color_annotations_with_na(self): widget = self.widget data = self._brown_selected_10() - data.Y[:3] = np.nan - data.metas[:3, -1] = np.nan + with data.unlocked(): + data.Y[:3] = np.nan + data.metas[:3, -1] = np.nan data_t = data.transpose(data) self.send_signal(widget.Inputs.data, data_t, widget=widget) widget.set_column_annotation_color_var(data.domain["function"]) self.assertTrue(widget.scene.widget.top_side_colors[0].isVisible()) widget.set_column_annotation_color_var(data.domain["diau g"]) - data.Y[:] = np.nan - data.metas[:, -1] = np.nan + with data.unlocked(): + data.Y[:] = np.nan + data.metas[:, -1] = np.nan data_t = data.transpose(data) self.send_signal(widget.Inputs.data, data_t, widget=widget) widget.set_column_annotation_color_var(data.domain["function"]) diff --git a/Orange/widgets/visualize/tests/test_owlinearprojection.py b/Orange/widgets/visualize/tests/test_owlinearprojection.py index b0617355c61..820739bbaab 100644 --- a/Orange/widgets/visualize/tests/test_owlinearprojection.py +++ b/Orange/widgets/visualize/tests/test_owlinearprojection.py @@ -38,8 +38,9 @@ def test_nan_plot(self): simulate.combobox_run_through_all(self.widget.controls.attr_color) simulate.combobox_run_through_all(self.widget.controls.attr_size) - data.X[:, 0] = np.nan - data.Y[:] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan + data.Y[:] = np.nan self.send_signal(self.widget.Inputs.data, data) self.send_signal(self.widget.Inputs.data_subset, data[2:3]) simulate.combobox_run_through_all(self.widget.controls.attr_color) @@ -108,7 +109,8 @@ def assertErrorShown(data, is_shown): self.assertEqual(is_shown, self.widget.Error.no_valid_data.is_shown()) data = Table("iris")[::30] - data[:, 0] = np.nan + with data.unlocked(): + data[:, 0] = np.nan for data, is_shown in zip([None, data, Table("iris")[:30]], [False, True, False]): assertErrorShown(data, is_shown) diff --git a/Orange/widgets/visualize/tests/test_owlineplot.py b/Orange/widgets/visualize/tests/test_owlineplot.py index 0df9f81f750..2b0ced1e7c7 100644 --- a/Orange/widgets/visualize/tests/test_owlineplot.py +++ b/Orange/widgets/visualize/tests/test_owlineplot.py @@ -120,7 +120,8 @@ def test_select(self): def test_saved_selection(self): data = self.data.copy() - data[0, 0] = np.nan + with data.unlocked(): + data[0, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) mask = np.zeros(len(data) - 1, dtype=bool) mask[::10] = True @@ -185,7 +186,8 @@ def test_max_features(self): def test_data_with_missing_values(self): data = self.data.copy() - data[0, 0] = np.nan + with data.unlocked(): + data[0, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Information.hidden_instances.is_shown()) self.send_signal(self.widget.Inputs.data, None) diff --git a/Orange/widgets/visualize/tests/test_owprojectionwidget.py b/Orange/widgets/visualize/tests/test_owprojectionwidget.py index 9a6562b8712..83214a684b1 100644 --- a/Orange/widgets/visualize/tests/test_owprojectionwidget.py +++ b/Orange/widgets/visualize/tests/test_owprojectionwidget.py @@ -155,6 +155,7 @@ def get_embedding(self): if not len(x_data[self.valid_data]): return None + x_data = x_data.copy() x_data[x_data == np.inf] = np.nan x_data_ = np.ones(len(x_data)) y_data = np.ones(len(x_data)) @@ -177,7 +178,8 @@ def setUp(self): def test_annotation_with_nans(self): data = Table.from_table_rows(self.data, [0, 1, 2]) - data.X[1, :] = np.nan + with data.unlocked(): + data.X[1, :] = np.nan self.send_signal(self.widget.Inputs.data, data) points = self.widget.graph.scatterplot_item.points() self.widget.graph.select_by_click(None, [points[1]]) diff --git a/Orange/widgets/visualize/tests/test_owscatterplot.py b/Orange/widgets/visualize/tests/test_owscatterplot.py index c935f63ff16..f1b7f5acde1 100644 --- a/Orange/widgets/visualize/tests/test_owscatterplot.py +++ b/Orange/widgets/visualize/tests/test_owscatterplot.py @@ -96,7 +96,8 @@ def test_error_message(self): """Check if error message appears and then disappears when data is removed from input""" data = self.data.copy() - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Warning.missing_coords.is_shown()) self.send_signal(self.widget.Inputs.data, None) @@ -288,7 +289,8 @@ def test_invalid_points_selection(self): "selection_group": [(i, 1) for i in range(50)]} ) data = self.data.copy()[:11] - data[0, 0] = np.nan + with data.unlocked(): + data[0, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertIsNone(self.get_output(self.widget.Outputs.selected_data)) @@ -394,12 +396,14 @@ def assert_vizrank_enabled(data, is_enabled): data1 = Table("iris")[::30] data2 = Table("iris")[::30] - data2.Y[:] = np.nan + with data2.unlocked(): + data2.Y[:] = np.nan domain = Domain( attributes=data2.domain.attributes[:4], class_vars=DiscreteVariable("iris", values=())) data2 = Table(domain, data2.X, Y=data2.Y) data3 = Table("iris")[::30] - data3.Y[:] = np.nan + with data3.unlocked(): + data3.Y[:] = np.nan for data, is_enabled in zip([data1, data2, data1, data3, data1], [True, False, True, False, True]): @@ -549,9 +553,10 @@ def test_handle_metas(self): class_vars=data.domain.class_vars, metas=data.domain.attributes[2:] ) - data = data.transform(domain) + data = data.transform(domain, copy=True) # Sometimes floats in metas are saved as objects - data.metas = data.metas.astype(object) + with data.unlocked(): + data.metas = data.metas.astype(object) self.send_signal(w.Inputs.data, data) simulate.combobox_activate_item(w.cb_attr_x, data.domain.metas[1].name) simulate.combobox_activate_item(w.controls.attr_color, data.domain.metas[0].name) @@ -596,8 +601,9 @@ def test_metas_zero_column(self): data = Table("iris") domain = data.domain domain = Domain(domain.attributes[:3], domain.class_vars, domain.attributes[3:]) - data = data.transform(domain) - data.metas[:, 0] = 0 + data = data.transform(domain, copy=True) + with data.unlocked(): + data.metas[:, 0] = 0 w = self.widget self.send_signal(w.Inputs.data, data) simulate.combobox_activate_item(w.controls.attr_x, domain.metas[0].name) @@ -667,8 +673,12 @@ def prepare_data(): data = Table("iris") values = list(range(15)) class_var = DiscreteVariable("iris5", values=[str(v) for v in values]) - data = data.transform(Domain(attributes=data.domain.attributes, class_vars=[class_var])) - data.Y = np.array(values * 10, dtype=float) + data = data.transform( + Domain(attributes=data.domain.attributes, + class_vars=[class_var]), + copy=True) + with data.unlocked(): + data.Y = np.array(values * 10, dtype=float) return data def assert_equal(data, max): @@ -679,7 +689,8 @@ def assert_equal(data, max): assert_equal(prepare_data(), MAX_COLORS) # data with nan value data = prepare_data() - data.Y[42] = np.nan + with data.unlocked(): + data.Y[42] = np.nan assert_equal(data, MAX_COLORS + 1) def test_invalidated_same_features(self): @@ -824,7 +835,8 @@ def test_regression_lines_appear(self): simulate.combobox_activate_index(self.widget.controls.attr_color, 0) self.assertEqual(len(self.widget.graph.reg_line_items), 1) data = self.data.copy() - data[:, 0] = np.nan + with data.unlocked(): + data[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertEqual(len(self.widget.graph.reg_line_items), 0) diff --git a/Orange/widgets/visualize/tests/test_owsilhouetteplot.py b/Orange/widgets/visualize/tests/test_owsilhouetteplot.py index 4791a155266..0df13d55b8f 100644 --- a/Orange/widgets/visualize/tests/test_owsilhouetteplot.py +++ b/Orange/widgets/visualize/tests/test_owsilhouetteplot.py @@ -65,7 +65,8 @@ def test_insufficient_clusters(self): def test_unknowns_in_labels(self): data = self.data[[0, 1, 2, 50, 51, 52, 100, 101, 102]] - data.Y[::3] = np.nan + with data.unlocked(data.Y): + data.Y[::3] = np.nan valid = ~np.isnan(data.Y.flatten()) self.send_signal(self.widget.Inputs.data, data) output = self.get_output(ANNOTATED_DATA_SIGNAL_NAME) @@ -87,7 +88,8 @@ def test_nan_distances(self): self.assertEqual(self.widget.Distances[self.widget.distance_idx][0], 'Cosine') data = self.data[[0, 1, 2, 50, 51, 52, 100, 101, 102]] - data.X[::3] = 0 + with data.unlocked(data.X): + data.X[::3] = 0 valid = np.any(data.X != 0, axis=1) self.assertFalse(self.widget.Warning.nan_distances.is_shown()) self.send_signal(self.widget.Inputs.data, data) diff --git a/Orange/widgets/visualize/tests/test_owvenndiagram.py b/Orange/widgets/visualize/tests/test_owvenndiagram.py index 9d025291cea..b77bc1d4698 100644 --- a/Orange/widgets/visualize/tests/test_owvenndiagram.py +++ b/Orange/widgets/visualize/tests/test_owvenndiagram.py @@ -37,7 +37,8 @@ def _select_data(self): def test_rows_id(self): data = Table('zoo') data1 = deepcopy(data) - data1[:, 1] = 1 + with data1.unlocked(): + data1[:, 1] = 1 self.widget.rowwise = True self.send_signal(self.signal_name, data1[:10], 1) self.widget.selected_feature = IDENTITY_STR @@ -177,7 +178,7 @@ def test_multiple_input_over_cols(self): selected_atr_name = 'Selected' input2 = self.data.transform(Domain([self.data.domain.attributes[0]], self.data.domain.class_vars, - self.data.domain.metas)) + self.data.domain.metas), copy=True) self.send_signal(self.signal_name, self.data, (1, 'Data', None)) self.send_signal(self.signal_name, input2, (2, 'Data', None)) @@ -200,7 +201,8 @@ def test_multiple_input_over_cols(self): input2.metas) #domain matches but the values do not - input2.X = input2.X - 1 + with input2.unlocked(input2.X): + input2.X = input2.X - 1 self.send_signal(self.signal_name, input2, (2, 'Data', None)) self.widget.vennwidget.vennareas()[3].setSelected(True) annotated = self.get_output(self.widget.Outputs.annotated_data) diff --git a/Orange/widgets/visualize/utils/widget.py b/Orange/widgets/visualize/utils/widget.py index d7094c85700..3e8a2592209 100644 --- a/Orange/widgets/visualize/utils/widget.py +++ b/Orange/widgets/visualize/utils/widget.py @@ -584,7 +584,8 @@ def _get_projection_data(self): data = self.data.transform(Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + variables)) - data.metas[:, -2:] = self.get_embedding() + with data.unlocked(data.metas): + data.metas[:, -2:] = self.get_embedding() return data def _get_projection_variables(self): @@ -755,7 +756,7 @@ def send_components(self): comp_name = get_unique_names(proposed, 'component') meta_attrs = [StringVariable(name=comp_name)] domain = Domain(self.effective_variables, metas=meta_attrs) - components = Table(domain, self._send_components_x(), + components = Table(domain, self._send_components_x().copy(), metas=self._send_components_metas()) components.name = "components" self.Outputs.components.send(components)