From d912603a4b41ebe483089fc555e22ce0264b51e5 Mon Sep 17 00:00:00 2001 From: janezd Date: Fri, 16 Apr 2021 20:07:06 +0200 Subject: [PATCH] squash this commit somewhere --- Orange/classification/_tree_scorers.pyx | 51 ++- Orange/classification/tree.py | 2 +- Orange/data/instance.py | 8 +- Orange/data/table.py | 328 ++++++++++++------ Orange/evaluation/testing.py | 3 +- Orange/preprocess/_relieff.pyx | 7 +- Orange/preprocess/preprocess.py | 18 +- Orange/preprocess/transformation.py | 2 +- Orange/statistics/util.py | 3 +- Orange/tests/test_classification.py | 5 +- Orange/tests/test_clustering_dbscan.py | 6 +- Orange/tests/test_clustering_kmeans.py | 6 +- Orange/tests/test_clustering_louvain.py | 8 +- Orange/tests/test_contingency.py | 64 ++-- Orange/tests/test_discretize.py | 6 +- Orange/tests/test_distribution.py | 15 +- Orange/tests/test_evaluation_scoring.py | 6 +- Orange/tests/test_evaluation_testing.py | 31 +- Orange/tests/test_filter.py | 6 +- Orange/tests/test_freeviz.py | 3 +- Orange/tests/test_impute.py | 3 +- Orange/tests/test_majority.py | 11 +- Orange/tests/test_normalize.py | 11 +- Orange/tests/test_preprocess.py | 57 +-- Orange/tests/test_radviz.py | 3 +- Orange/tests/test_score_feature.py | 3 +- Orange/tests/test_softmax_regression.py | 5 +- Orange/tests/test_sparse_table.py | 16 +- Orange/tests/test_svm.py | 3 +- Orange/tests/test_table.py | 279 +++++++++------ Orange/widgets/tests/base.py | 12 +- Orange/widgets/utils/annotated_data.py | 4 +- .../widgets/visualize/owlinearprojection.py | 5 +- Orange/widgets/visualize/owsilhouetteplot.py | 9 +- .../widgets/visualize/tests/test_owboxplot.py | 15 +- .../visualize/tests/test_owdistributions.py | 18 +- .../widgets/visualize/tests/test_owheatmap.py | 28 +- .../tests/test_owlinearprojection.py | 8 +- .../visualize/tests/test_owlineplot.py | 6 +- .../tests/test_owprojectionwidget.py | 4 +- .../visualize/tests/test_owscatterplot.py | 36 +- .../visualize/tests/test_owsilhouetteplot.py | 6 +- .../visualize/tests/test_owvenndiagram.py | 3 +- Orange/widgets/visualize/utils/widget.py | 3 +- 44 files changed, 734 insertions(+), 392 deletions(-) diff --git a/Orange/classification/_tree_scorers.pyx b/Orange/classification/_tree_scorers.pyx index 00bd0639ca0..cb3bcb8172f 100644 --- a/Orange/classification/_tree_scorers.pyx +++ b/Orange/classification/_tree_scorers.pyx @@ -17,7 +17,7 @@ cdef extern from "numpy/npy_math.h": cpdef enum: NULL_BRANCH = -1 -def contingency(double[:] x, int nx, double[:] y, int ny): +def contingency(const double[:] x, int nx, const double[:] y, int ny): cdef: np.ndarray[np.uint32_t, ndim=2] cont = np.zeros((ny, nx), dtype=np.uint32) int n = len(x), yi, xi @@ -28,7 +28,8 @@ def contingency(double[:] x, int nx, double[:] y, int ny): cont[yi, xi] += 1 return cont -def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx, +def find_threshold_entropy(const double[:] x, const double[:] y, + const np.intp_t[:] idx, int n_classes, int min_leaf): """ Find the threshold for continuous attribute values that maximizes @@ -89,8 +90,9 @@ def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx, return (class_entro - best_entro) / N / log(2), x[idx[best_idx]] -def find_binarization_entropy(double[:, :] cont, double[:] class_distr, - double[:] val_distr, int min_leaf): +def find_binarization_entropy(const double[:, :] cont, + const double[:] class_distr, + const double[:] val_distr, int min_leaf): """ Find the split of discrete values into two groups that optimizes information gain. @@ -187,7 +189,9 @@ def find_binarization_entropy(double[:, :] cont, double[:] class_distr, return (class_entro - best_entro) / N / log(2), best_mapping -def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf): +def find_threshold_MSE(const double[:] x, + const double[:] y, + const np.intp_t[:] idx, int min_leaf): """ Find the threshold for continuous attribute values that minimizes MSE. @@ -232,7 +236,8 @@ def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf) return (best_inter - (sum * sum) / N) / N, x[idx[best_idx]] -def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf): +def find_binarization_MSE(const double[:] x, + const double[:] y, int n_values, int min_leaf): """ Find the split of discrete values into two groups that minimizes the MSE. @@ -315,7 +320,9 @@ def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf): return (best_inter - start_inter) / x.shape[0], best_mapping -def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf): +def compute_grouped_MSE(const double[:] x, + const double[:] y, + int n_values, int min_leaf): """ Compute the MSE decrease of the given split into groups. @@ -371,8 +378,10 @@ def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf): return (inter - sum * sum / n) / x.shape[0] -def compute_predictions(double[:, :] X, int[:] code, - double[:, :] values, double[:] thresholds): +def compute_predictions(const double[:, :] X, + const int[:] code, + const double[:, :] values, + const double[:] thresholds): """ Return the values (distributions, means and variances) stored in the nodes to which the tree classify the rows in X. @@ -419,8 +428,10 @@ def compute_predictions(double[:, :] X, int[:] code, return np.asarray(predictions) -def compute_predictions_csr(X, int[:] code, - double[:, :] values, double[:] thresholds): +def compute_predictions_csr(X, + const int[:] code, + const double[:, :] values, + const double[:] thresholds): """ Same as compute_predictions except for sparse data """ @@ -431,9 +442,9 @@ def compute_predictions_csr(X, int[:] code, double[: ,:] predictions = np.empty( (X.shape[0], values.shape[1]), dtype=np.float64) - double[:] data = X.data - np.int32_t[:] indptr = X.indptr - np.int32_t[:] indices = X.indices + const double[:] data = X.data + const np.int32_t[:] indptr = X.indptr + const np.int32_t[:] indices = X.indices int ind, attr, n_rows n_rows = X.shape[0] @@ -463,8 +474,10 @@ def compute_predictions_csr(X, int[:] code, predictions[i, j] = values[node_idx, j] return np.asarray(predictions) -def compute_predictions_csc(X, int[:] code, - double[:, :] values, double[:] thresholds): +def compute_predictions_csc(X, + const int[:] code, + const double[:, :] values, + const double[:] thresholds): """ Same as compute_predictions except for sparse data """ @@ -475,9 +488,9 @@ def compute_predictions_csc(X, int[:] code, double[: ,:] predictions = np.empty( (X.shape[0], values.shape[1]), dtype=np.float64) - double[:] data = X.data - np.int32_t[:] indptr = X.indptr - np.int32_t[:] indices = X.indices + const double[:] data = X.data + const np.int32_t[:] indptr = X.indptr + const np.int32_t[:] indices = X.indices int ind, attr, n_rows n_rows = X.shape[0] diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py index fa8000ac175..764e801f53d 100644 --- a/Orange/classification/tree.py +++ b/Orange/classification/tree.py @@ -112,7 +112,7 @@ def _score_disc(): cont_entr = np.sum(cont * np.log(cont)) score = (class_entr - attr_entr + cont_entr) / n / np.log(2) score *= n / len(data) # punishment for missing values - branches = col_x + branches = col_x.copy() branches[np.isnan(branches)] = -1 if score == 0: return REJECT_ATTRIBUTE diff --git a/Orange/data/instance.py b/Orange/data/instance.py index 501ed6b6acd..53faa6a19ec 100644 --- a/Orange/data/instance.py +++ b/Orange/data/instance.py @@ -34,11 +34,12 @@ def __init__(self, domain, data=None, id=None): self._weight = 1 elif isinstance(data, Instance) and data.domain == domain: self._x = np.array(data._x) - self._y = np.array(data._y) + self._y = np.atleast_1d(np.array(data._y)) self._metas = np.array(data._metas) self._weight = data._weight else: self._x, self._y, self._metas = domain.convert(data) + self._y = np.atleast_1d(self._y) self._weight = 1 if id is not None: @@ -116,7 +117,10 @@ def __getitem__(self, key): if 0 <= idx < len(self._domain.attributes): value = self._x[idx] elif idx >= len(self._domain.attributes): - value = self._y[idx - len(self.domain.attributes)] + if self._y.ndim == 0: + value = self._y + else: + value = self._y[idx - len(self.domain.attributes)] else: value = self._metas[-1 - idx] var = self._domain[idx] diff --git a/Orange/data/table.py b/Orange/data/table.py index d34c781595f..dcccffc0487 100644 --- a/Orange/data/table.py +++ b/Orange/data/table.py @@ -1,5 +1,6 @@ import operator import os +import sys import threading import warnings import weakref @@ -96,6 +97,7 @@ def __init__(self, table, row_index): if sp.issparse(self._y): self.sparse_y = sp.csr_matrix(self._y) self._y = np.asarray(self._y.todense())[0] + self._y = np.atleast_1d(self._y) self._metas = table.metas[row_index] if sp.issparse(self._metas): self.sparse_metas = sp.csr_matrix(self._metas) @@ -117,9 +119,12 @@ def set_class(self, value): self._check_single_class() if not isinstance(value, Real): value = self.table.domain.class_var.to_val(value) - self._y[0] = value if self.sparse_y: self.table._Y[self.row_index, 0] = value + else: + self.table._Y[self.row_index] = value + if self.table._Y.ndim == 1: # if _y is not a view + self._y[0] = value def __setitem__(self, key, value): if not isinstance(key, Integral): @@ -132,17 +137,21 @@ def __setitem__(self, key, value): raise TypeError("Expected primitive value, got '%s'" % type(value).__name__) if key < len(self._x): - self._x[key] = value + # write to self.table.X to support table unlocking for live instances + self.table.X[self.row_index, key] = value if self.sparse_x is not None: - self.table.X[self.row_index, key] = value + self._x[key] = value else: - self._y[key - len(self._x)] = value if self.sparse_y is not None: self.table._Y[self.row_index, key - len(self._x)] = value + else: + self.table._Y[self.row_index] = value + if self.table._Y.ndim == 1: # if _y is not a view + self._y[0] = value else: - self._metas[-1 - key] = value - if self.sparse_metas: - self.table.metas[self.row_index, -1 - key] = value + self.table.metas[self.row_index, -1 - key] = value + if self.sparse_metas is not None: + self._metas[-1 - key] = value def _str(self, limit): def sp_values(matrix, variables): @@ -235,14 +244,15 @@ def get_subarray(self, source, row_indices, n_rows): arr = match_density(_subarray(source.metas, row_indices, [-1 - x for x in self.src_cols])) elif self.subarray_from == "Y": + Y = source._Y if source._Y.ndim == 2 else source._Y[:, None] arr = match_density(_subarray( - source._Y, row_indices, + Y, row_indices, [x - n_src_attrs for x in self.src_cols])) else: assert False if arr.dtype != self.dtype: arr = arr.astype(self.dtype) - assert arr.ndim == 2 + assert arr.ndim == 2 or self.subarray_from == "Y" and arr.ndim == 1 return arr def get_columns(self, source, row_indices, n_rows, out=None, target_indices=None): @@ -258,8 +268,13 @@ def get_columns(self, source, row_indices, n_rows, out=None, target_indices=None # converting to csc before instead of each column is faster # do not convert if not required if any(isinstance(x, int) for x in self.src_cols): - X = csc_matrix(source.X) if self.is_sparse else source.X - Y = csc_matrix(source._Y) if self.is_sparse else source._Y + X = source.X + Y = source.Y + if Y.ndim == 1: + Y = Y[:, None] + if self.is_sparse: + X = csc_matrix(X) + Y = csc_matrix(Y) if self.row_selection_needed: if row_indices is ...: @@ -347,8 +362,7 @@ class Table(Sequence, Storage): name = "untitled" domain = Domain([]) - X = _Y = metas = W = np.zeros((0, 0)) - X.setflags(write=False) + _X = _Y = _metas = _W = np.zeros((0, 0)) ids = np.zeros(0) ids.setflags(write=False) attributes = frozendict() @@ -368,61 +382,80 @@ def columns(self): _next_instance_id = 0 _next_instance_lock = Lock() + @property + def X(self): + return self._X + + @X.setter + def X(self, value): + if not self._unlocked & self._Unlocked_X: + raise ValueError("Table is readonly unless unlocked") + self._X = _dereferenced(value) + @property def Y(self): - if self._Y.shape[1] == 1: - return self._Y[:, 0] return self._Y @Y.setter def Y(self, value): if not self._unlocked & self._Unlocked_Y: raise ValueError("Table is readonly unless unlocked") - if len(value.shape) == 1: - value = value[:, None] if sp.issparse(value) and len(self) != value.shape[0]: value = value.T if sp.issparse(value): - value = value.toarray() + value = _dereferenced(value.toarray()) + if value.ndim == 2 and value.shape[1] == 1: + value = value[:, 0].copy() # no views! self._Y = value + @property + def metas(self): + return self._metas + + @metas.setter + def metas(self, value): + if not self._unlocked & self._Unlocked_metas: + raise ValueError("Table is readonly unless unlocked") + self._metas = _dereferenced(value) + + @property + def W(self): + return self._W + + @W.setter + def W(self, value): + if not self._unlocked & self._Unlocked_W: + raise ValueError("Table is readonly unless unlocked") + self._W = value def _lock_parts(self): - return ((self.X, self._Unlocked_X, "X"), + return ((self._X, self._Unlocked_X, "X"), (self._Y, self._Unlocked_Y, "Y"), - (self.metas, self._Unlocked_metas, "metas"), - (self.W, self._Unlocked_W, "weights")) + (self._metas, self._Unlocked_metas, "metas"), + (self._W, self._Unlocked_W, "weights")) def _update_locks(self): + def sync(*xs): + for x in xs: + x.flags.writeable = writeable + for part, flag, _ in self._lock_parts(): if part is None: continue writeable = bool(self._unlocked & flag) - if sp.issparse(part): - if sp.isspmatrix_csr(part) or sp.isspmatrix_csc(part): - for a in (part.data, part.indices, part.indptr): - a.flags.writeable = writeable - elif sp.ismatrix_coo(part): - for a in (part.data, part.row, part.col): - a.flags.writeable = writeable - else: - raise ValueError("Unsupported sparse data type") + if sp.isspmatrix_csr(part) or sp.isspmatrix_csc(part): + sync(part.data, part.indices, part.indptr) + elif sp.isspmatrix_coo(part): + sync(part.data, part.row, part.col) + elif sp.issparse(part): + raise ValueError("Unsupported sparse data type") else: - part.flags.writeable = writeable + sync(part) - @contextmanager - def unlocked(self, *parts): + def __unlocked(self, *parts): prev_state = self._unlocked for part, flag, name in self._lock_parts(): - if (not parts or any(ppart is part - # self.Y can be a view of self._Y - or ppart.base is part is self._Y - for ppart in parts)) \ - and not flag & self._unlocked: - if part is not None \ - and not (part.flags.writeable or part.flags.owndata): - raise ValueError(f"'{name}' is a view into another table " - "and cannot be unlocked") + if not parts or any(ppart is part for ppart in parts): self._unlocked |= flag try: self._update_locks() @@ -431,6 +464,26 @@ def unlocked(self, *parts): self._unlocked = prev_state self._update_locks() + def unlocked(self, *parts): + """ + Unlock the given parts (default: all parts) of the table. + + The caller must ensure that the table is safe to modify. The function + will raise an exception if the table contains view to other table. + """ + def can_unlock(x): + if sp.issparse(x): + return can_unlock(x.data) + return x.flags.writeable or x.flags.owndata + + for part, flag, name in self._lock_parts(): + if not flag & self._unlocked \ + and (not parts or any(ppart is part for ppart in parts)) \ + and part is not None and not can_unlock(part): + raise ValueError(f"'{name}' is a view into another table " + "and cannot be unlocked") + return contextmanager(self.__unlocked)(*parts) + def __new__(cls, *args, **kwargs): def warn_deprecated(method): warnings.warn("Direct calls to Table's constructor are deprecated " @@ -455,7 +508,8 @@ def warn_deprecated(method): elif isinstance(args[0], Table): if len(args) > 1: raise TypeError("Table(table: Table) expects just one argument") - return cls.from_table(args[0].domain, args[0], **kwargs) + return cls.from_table(args[0].domain, args[0], + copy=kwargs.pop("copy", True), **kwargs) elif isinstance(args[0], Domain): domain, args = args[0], args[1:] if not args: @@ -500,7 +554,10 @@ def from_domain(cls, domain, n_rows=0, weights=False): self.n_rows = n_rows with self.unlocked(): self.X = np.zeros((n_rows, len(domain.attributes))) - self.Y = np.zeros((n_rows, len(domain.class_vars))) + if len(domain.class_vars) != 1: + self.Y = np.zeros((n_rows, len(domain.class_vars))) + else: + self.Y = np.zeros(n_rows) if weights: self.W = np.ones(n_rows) else: @@ -511,7 +568,7 @@ def from_domain(cls, domain, n_rows=0, weights=False): return self @classmethod - def from_table(cls, domain, source, row_indices=...): + def from_table(cls, domain, source, row_indices=..., *, copy=False): """ Create a new table from selected columns and/or rows of an existing one. The columns are chosen using a domain. The domain may also include @@ -526,6 +583,8 @@ def from_table(cls, domain, source, row_indices=...): :type source: Orange.data.Table :param row_indices: indices of the rows to include :type row_indices: a slice or a sequence + :param copy: if True, copy all tables (default: False, create views) + :type copy: bool :return: a new table :rtype: Orange.data.Table """ @@ -542,7 +601,7 @@ def from_table(cls, domain, source, row_indices=...): if cached is not None: return cached if domain is source.domain: - table = cls.from_table_rows(source, row_indices) + table = cls.from_table_rows(source, row_indices, copy=copy) # assure resulting domain is the instance passed on input table.domain = domain # since sparse flags are not considered when checking for @@ -638,13 +697,15 @@ def from_table(cls, domain, source, row_indices=...): cls._init_ids(self) self.attributes = getattr(source, 'attributes', {}) _idcache_save(_thread_local.conversion_cache, (domain, source), self) + if copy: + self.ensure_copy() return self finally: if new_cache: _thread_local.conversion_cache = None _thread_local.domain_cache = None - def transform(self, domain): + def transform(self, domain, copy=False): """ Construct a table with a different domain. @@ -667,10 +728,10 @@ def transform(self, domain): Returns: A new table """ - return type(self).from_table(domain, self) + return type(self).from_table(domain, self, copy=copy) @classmethod - def from_table_rows(cls, source, row_indices): + def from_table_rows(cls, source, row_indices, *, copy=False): """ Construct a new table by selecting rows from the source table. @@ -678,12 +739,14 @@ def from_table_rows(cls, source, row_indices): :type source: Orange.data.Table :param row_indices: indices of the rows to include :type row_indices: a slice or a sequence + :param copy: if True, copy all tables (default: False, create views) + :type copy: bool :return: a new table :rtype: Orange.data.Table """ def get_rows(a): a = a[row_indices] - if isinstance(row_indices, slice) or row_indices is ...: + if isinstance(row_indices, slice) or row_indices is ... or copy: a = a.copy() return a @@ -693,7 +756,7 @@ def get_rows(a): self.X = get_rows(source.X) if self.X.ndim == 1: self.X = self.X.reshape(-1, len(self.domain.attributes)) - self.Y = get_rows(source._Y) + self.Y = get_rows(source.Y) self.metas = get_rows(source.metas) if self.metas.ndim == 1: self.metas = self.metas.reshape(-1, len(self.domain.metas)) @@ -728,17 +791,19 @@ def from_numpy(cls, domain, X, Y=None, metas=None, W=None, metas, = _check_arrays(metas, dtype=object, shape_1=X.shape[0]) ids, = _check_arrays(ids, dtype=int, shape_1=X.shape[0]) - if Y is not None and Y.ndim == 1: - Y = Y.reshape(Y.shape[0], 1) if domain is None: domain = Domain.from_numpy(X, Y, metas) if Y is None: - if sp.issparse(X): + if not domain.class_vars or sp.issparse(X): Y = np.empty((X.shape[0], 0), dtype=np.float64) else: + own_data = X.flags.owndata and X.base is None Y = X[:, len(domain.attributes):] X = X[:, :len(domain.attributes)] + if own_data: + Y = Y.copy() + X = X.copy() if metas is None: metas = np.empty((X.shape[0], 0), object) if W is None or W.size == 0: @@ -751,7 +816,12 @@ def from_numpy(cls, domain, X, Y=None, metas=None, W=None, "Invalid number of variable columns ({} != {})".format( X.shape[1], len(domain.attributes)) ) - if Y.shape[1] != len(domain.class_vars): + if Y.ndim == 1: + if not domain.class_var: + raise ValueError( + "Invalid number of class columns " + f"(1 != {len(domain.class_vars)})") + elif Y.shape[1] != len(domain.class_vars): raise ValueError( "Invalid number of class columns ({} != {})".format( Y.shape[1], len(domain.class_vars)) @@ -785,19 +855,20 @@ def from_list(cls, domain, rows, weights=None): if weights is not None and len(rows) != len(weights): raise ValueError("mismatching number of instances and weights") self = cls.from_domain(domain, len(rows), weights is not None) - attrs, classes = domain.attributes, domain.class_vars - metas = domain.metas - nattrs, ncls = len(domain.attributes), len(domain.class_vars) + all_vars = domain.variables + domain.metas + nattrs = len(domain.attributes) + nattrscls = len(domain.variables) with self.unlocked(): for i, row in enumerate(rows): if isinstance(row, Instance): row = row.list - for j, (var, val) in enumerate(zip(attrs, row)): - self.X[i, j] = var.to_val(val) - for j, (var, val) in enumerate(zip(classes, row[nattrs:])): - self._Y[i, j] = var.to_val(val) - for j, (var, val) in enumerate(zip(metas, row[nattrs + ncls:])): - self.metas[i, j] = var.to_val(val) + vals = [var.to_val(val) for var, val in zip(all_vars, row)] + self.X[i] = vals[:nattrs] + if self._Y.ndim == 1: + self._Y[i] = vals[nattrs] if nattrs < len(vals) else np.nan + else: + self._Y[i] = vals[nattrs:nattrscls] + self.metas[i] = vals[nattrscls:] if weights is not None: self.W = np.array(weights) self.attributes = {} @@ -924,12 +995,11 @@ def _set_row(self, example, row): domain = self.domain if isinstance(example, Instance): if example.domain == domain: - if isinstance(example, RowInstance): - self.X[row] = example._x - self._Y[row] = example._y + self.X[row] = example._x + if self._Y.ndim == 1: + self._Y[row] = float(example._y) else: - self.X[row] = example._x - self._Y[row] = example._y + self._Y[row] = np.atleast_1d(example._y) self.metas[row] = example._metas return @@ -943,12 +1013,16 @@ def _set_row(self, example, row): type(self)._next_instance_id += 1 else: - self.X[row] = [var.to_val(val) - for var, val in zip(domain.attributes, example)] - self._Y[row] = [var.to_val(val) - for var, val in - zip(domain.class_vars, - example[len(domain.attributes):])] + attrs = domain.attributes + if len(example) != len(domain.variables): + raise ValueError("invalid length") + self._X[row] = [var.to_val(val) for var, val in zip(attrs, example)] + if self._Y.ndim == 1: + self._Y[row] = domain.class_var.to_val(example[len(attrs)]) + else: + self._Y[row] = [var.to_val(val) + for var, val in zip(domain.class_vars, + example[len(attrs):])] self.metas[row] = np.array([var.Unknown for var in domain.metas], dtype=object) @@ -973,6 +1047,8 @@ def __getitem__(self, key): var = self.domain[col_idx] if 0 <= col_idx < len(self.domain.attributes): val = self.X[row_idx, col_idx] + elif col_idx == len(self.domain.attributes) and self._Y.ndim == 1: + val = self._Y[row_idx] elif col_idx >= len(self.domain.attributes): val = self._Y[row_idx, col_idx - len(self.domain.attributes)] @@ -1043,6 +1119,8 @@ def __setitem__(self, key, value): if col_idx >= 0: if col_idx < self.X.shape[1]: self.X[row_idx, col_idx] = val + elif self._Y.ndim == 1 and col_idx == self.X.shape[1]: + self._Y[row_idx] = val else: self._Y[row_idx, col_idx - self.X.shape[1]] = val else: @@ -1057,12 +1135,16 @@ def __setitem__(self, key, value): if not attributes: attributes = self.domain.attributes for var, col in zip(attributes, col_indices): + val = var.to_val(value) if 0 <= col < n_attrs: - self.X[row_idx, col] = var.to_val(value) + self.X[row_idx, col] = val elif col >= n_attrs: - self._Y[row_idx, col - n_attrs] = var.to_val(value) + if self._Y.ndim == 1 and col == n_attrs: + self._Y[row_idx] = val + else: + self._Y[row_idx, col - n_attrs] = val else: - self.metas[row_idx, -1 - col] = var.to_val(value) + self.metas[row_idx, -1 - col] = val else: attr_cols = np.fromiter( (col for col in col_indices if 0 <= col < n_attrs), int) @@ -1080,7 +1162,10 @@ def __setitem__(self, key, value): if len(attr_cols): self.X[row_idx, attr_cols] = value if len(class_cols): - self._Y[row_idx, class_cols] = value + if self._Y.ndim == 1 and np.all(class_cols == 0): + self._Y[row_idx] = value + else: + self._Y[row_idx, class_cols] = value if len(meta_cols): self.metas[row_idx, meta_cols] = value @@ -1241,25 +1326,25 @@ def is_view(self): """ Return `True` if all arrays represent a view referring to another table """ - return ((not self.X.shape[-1] or self.X.base is not None) and + return ((not self._X.shape[-1] or self._X.base is not None) and (not self._Y.shape[-1] or self._Y.base is not None) and - (not self.metas.shape[-1] or self.metas.base is not None) and - (not self._weights.shape[-1] or self.W.base is not None)) + (not self._metas.shape[-1] or self._metas.base is not None) and + (not self._weights.shape[-1] or self._W.base is not None)) def is_copy(self): """ Return `True` if the table owns its data """ - return ((not self.X.shape[-1] or self.X.base is None) and + return ((not self._X.shape[-1] or self._X.base is None) and (self._Y.base is None) and - (self.metas.base is None) and - (self.W.base is None)) + (self._metas.base is None) and + (self._W.base is None)) def is_sparse(self): """ Return `True` if the table stores data in sparse format """ - return any(sp.issparse(i) for i in [self.X, self.Y, self.metas]) + return any(sp.issparse(i) for i in [self._X, self._Y, self._metas]) def ensure_copy(self): """ @@ -1271,15 +1356,14 @@ def is_view(x): # them creates copies in constructor we can skip this check here. return not sp.issparse(x) and x.base is not None - with self.unlocked(): - if is_view(self.X): - self.X = self.X.copy() - if is_view(self._Y): - self._Y = self._Y.copy() - if is_view(self.metas): - self.metas = self.metas.copy() - if is_view(self.W): - self.W = self.W.copy() + if is_view(self._X): + self._X = self._X.copy() + if is_view(self._Y): + self._Y = self._Y.copy() + if is_view(self._metas): + self._metas = self._metas.copy() + if is_view(self._W): + self._W = self._W.copy() def copy(self): """ @@ -1362,11 +1446,11 @@ def checksum(self, include_metas=True): # (after pickling and unpickling such arrays, checksum changes) # Why, and should we fix it or remove it? """Return a checksum over X, Y, metas and W.""" - cs = zlib.adler32(np.ascontiguousarray(self.X)) + cs = zlib.adler32(np.ascontiguousarray(self._X)) cs = zlib.adler32(np.ascontiguousarray(self._Y), cs) if include_metas: - cs = zlib.adler32(np.ascontiguousarray(self.metas), cs) - cs = zlib.adler32(np.ascontiguousarray(self.W), cs) + cs = zlib.adler32(np.ascontiguousarray(self._metas), cs) + cs = zlib.adler32(np.ascontiguousarray(self._W), cs) return cs def shuffle(self): @@ -1404,6 +1488,8 @@ def rx(M): if col_index >= 0: if col_index < self.X.shape[1]: col = rx(self.X[:, col_index]) + elif self._Y.ndim == 1 and col_index == self._X.shape[1]: + col = rx(self._Y) else: col = rx(self._Y[:, col_index - self.X.shape[1]]) else: @@ -1428,7 +1514,10 @@ def _sp_anynan(a): if sp.issparse(self._Y): remove += _sp_anynan(self._Y) else: - remove += bn.anynan(self._Y, axis=1) + if self._Y.ndim == 1: + remove += np.isnan(self._Y) + else: + remove += bn.anynan(self._Y, axis=1) if sp.issparse(self.metas): remove += _sp_anynan(self._metas) else: @@ -1460,7 +1549,10 @@ def _filter_has_class(self, negate=False): retain = (self._Y.indptr[1:] == self._Y.indptr[-1:] + self._Y.shape[1]) else: - retain = bn.anynan(self._Y, axis=1) + if self._Y.ndim == 1: + retain = np.isnan(self._Y) + else: + retain = bn.anynan(self._Y, axis=1) if not negate: retain = np.logical_not(retain) return self.from_table_rows(self, retain) @@ -1694,12 +1786,12 @@ def _compute_basic_stats(self, columns=None, if compute_variance: raise NotImplementedError("computation of variance is " "not implemented yet") - W = self.W if self.has_weights() else None + W = self._W if self.has_weights() else None rr = [] stats = [] if not columns: if self.domain.attributes: - rr.append(fast_stats(self.X, W)) + rr.append(fast_stats(self._X, W)) if self.domain.class_vars: rr.append(fast_stats(self._Y, W)) if include_metas and self.domain.metas: @@ -1711,11 +1803,14 @@ def _compute_basic_stats(self, columns=None, for column in columns: c = self.domain.index(column) if 0 <= c < nattrs: - S = fast_stats(self.X[:, [c]], W and W[:, [c]]) + S = fast_stats(self._X[:, [c]], W and W[:, [c]]) elif c >= nattrs: - S = fast_stats(self._Y[:, [c - nattrs]], W and W[:, [c - nattrs]]) + if self._Y.ndim == 1 and c == nattrs: + S = fast_stats(self._Y[:, None], W and W[:, None]) + else: + S = fast_stats(self._Y[:, [c - nattrs]], W and W[:, [c - nattrs]]) else: - S = fast_stats(self.metas[:, [-1 - c]], W and W[:, [-1 - c]]) + S = fast_stats(self._metas[:, [-1 - c]], W and W[:, [-1 - c]]) stats.append(S[0]) return stats @@ -1743,6 +1838,8 @@ def _compute_distributions(self, columns=None): x = self.metas[:, col * (-1) - 1] if np.issubdtype(x.dtype, np.dtype(object)): x = x.astype(float) + elif self._Y.ndim == 1 and col == X.shape[1]: + x = self._Y else: x = self._Y[:, col - X.shape[1]] @@ -1803,6 +1900,8 @@ def _compute_contingency(self, col_vars=None, row_var=None): row_data = self.X[:, row_indi] elif row_indi < 0: row_data = self.metas[:, -1 - row_indi] + elif self._Y.ndim == 1 and row_indi == n_atts: + row_data = self._Y else: row_data = self._Y[:, row_indi - n_atts] @@ -1848,8 +1947,9 @@ def _compute_contingency(self, col_vars=None, row_var=None): nans_rows[arr_i], nans[arr_i]) else: for col_i, arr_i, var in disc_vars: + col = arr if arr.ndim == 1 else arr[:, arr_i] contingencies[col_i] = contingency( - arr[:, arr_i].astype(float), + col.astype(float), row_data, len(var.values) - 1, n_rows - 1, W) cont_vars = [v for v in vars if v[2].is_continuous] @@ -2054,6 +2154,19 @@ def densify(features): return t +def _dereferenced(array): + # CSR and CSC matrices are constructed so that array.data is a + # view to a base, which prevents unlocking them. Therefore, if + # sparse matrix doesn't own its data, but its base array is + # referenced only by this matrix, we copy it. This doesn't + # increase memory use, but allows unlocking. + if sp.issparse(array) \ + and array.data.base is not None \ + and sys.getrefcount(array.data.base) == 2: # 2 = 1 real + 1 for arg + array.data = array.data.copy() + return array + + def _check_arrays(*arrays, dtype=None, shape_1=None): checked = [] if not len(arrays): @@ -2081,6 +2194,7 @@ def ninstances(array): if not (sp.isspmatrix_csr(array) or sp.isspmatrix_csc(array)): array = array.tocsr() array.data = np.asarray(array.data) + array = _dereferenced(array) has_inf = _check_inf(array.data) else: if dtype is not None: @@ -2104,6 +2218,8 @@ def _check_inf(array): def _subarray(arr, rows, cols): rows = _optimize_indices(rows, arr.shape[0]) + if arr.ndim == 1: + return arr[rows] cols = _optimize_indices(cols, arr.shape[1]) return arr[_rxc_ix(rows, cols)] diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 400bef53047..8d37dafa232 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -311,7 +311,8 @@ def get_augmented_data(self, model_names, attrs = data.domain.attributes if include_attrs else [] domain = Domain(attrs, data.domain.class_vars, metas=new_meta_attr) predictions = data.transform(domain) - predictions.metas = new_meta_vals + with predictions.unlocked(predictions.metas): + predictions.metas = new_meta_vals predictions.name = data.name return predictions diff --git a/Orange/preprocess/_relieff.pyx b/Orange/preprocess/_relieff.pyx index 67b2c7a7712..11da65af362 100644 --- a/Orange/preprocess/_relieff.pyx +++ b/Orange/preprocess/_relieff.pyx @@ -362,7 +362,12 @@ cdef tuple prepare(X, y, is_discrete, contingencies): row_ptp[row_ptp == 0] = np.inf # Avoid zero-division X[:, is_continuous] -= row_min[is_continuous] X[:, is_continuous] /= row_ptp[is_continuous] - y = np.array(y, dtype=np.float64) + if y.ndim > 1: + if y.shape[1] > 1: + raise ValueError("ReliefF expects a single class") + y = np.array(y[:, 0], dtype=np.float64) + else: + y = np.array(y, dtype=np.float64) is_defined = np.logical_not(np.isnan(y)) X = X[is_defined] y = y[is_defined] diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 1334fbc8c0e..aa4724801a9 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -170,8 +170,9 @@ def __call__(self, data): assert X.shape[1] == len(features) domain = Orange.data.Domain(features, data.domain.class_vars, data.domain.metas) - new_data = data.transform(domain) - new_data.X = X + new_data = data.transform(domain, copy=True) + with new_data.unlocked(new_data.X): + new_data.X = X return new_data @@ -414,12 +415,13 @@ def __call__(self, data): rstate = np.random.RandomState(self.rand_seed) # ensure the same seed is not used to shuffle X and Y at the same time r1, r2, r3 = rstate.randint(0, 2 ** 32 - 1, size=3, dtype=np.int64) - if self.rand_type & Randomize.RandomizeClasses: - new_data.Y = self.randomize(new_data.Y, r1) - if self.rand_type & Randomize.RandomizeAttributes: - new_data.X = self.randomize(new_data.X, r2) - if self.rand_type & Randomize.RandomizeMetas: - new_data.metas = self.randomize(new_data.metas, r3) + with new_data.unlocked(): + if self.rand_type & Randomize.RandomizeClasses: + new_data.Y = self.randomize(new_data.Y, r1) + if self.rand_type & Randomize.RandomizeAttributes: + new_data.X = self.randomize(new_data.X, r2) + if self.rand_type & Randomize.RandomizeMetas: + new_data.metas = self.randomize(new_data.metas, r3) return new_data @staticmethod diff --git a/Orange/preprocess/transformation.py b/Orange/preprocess/transformation.py index 01ea719b5ce..43dcb4bf79a 100644 --- a/Orange/preprocess/transformation.py +++ b/Orange/preprocess/transformation.py @@ -36,7 +36,7 @@ def __call__(self, data): col = data.X else: col = data.metas - if not sp.issparse(col): + if not sp.issparse(col) and col.ndim > 1: col = col.squeeze(axis=1) transformed = self.transform(col) if inst: diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index a8080b6a3e0..87af6833f7a 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -355,7 +355,8 @@ def weighted_mean(): np.nanmin(X, axis=0), np.nanmax(X, axis=0), np.nanmean(X, axis=0) if not weighted else weighted_mean(), - np.nanvar(X, axis=0) if compute_variance else np.zeros(X.shape[1]), + np.nanvar(X, axis=0) if compute_variance else \ + np.zeros(X.shape[1] if X.ndim == 2 else 1), nans, X.shape[0] - nans)) elif is_sparse and X.size: diff --git a/Orange/tests/test_classification.py b/Orange/tests/test_classification.py index 95081652ac0..e2355046602 100644 --- a/Orange/tests/test_classification.py +++ b/Orange/tests/test_classification.py @@ -324,7 +324,8 @@ def test_multinomial(self): def test_nan_columns(self): data = Orange.data.Table("iris") - data.X[:, (1, 3)] = np.NaN + with data.unlocked(): + data.X[:, (1, 3)] = np.NaN lr = LogisticRegressionLearner() cv = CrossValidation(k=2, store_models=True) res = cv(data, [lr]) @@ -364,7 +365,7 @@ class UnknownValuesInPrediction(unittest.TestCase): def test_unknown(self): table = Table("iris") tree = LogisticRegressionLearner()(table) - tree([1, 2, None]) + tree([1, 2, None, 4]) def test_missing_class(self): table = Table(test_filename("datasets/adult_sample_missing")) diff --git a/Orange/tests/test_clustering_dbscan.py b/Orange/tests/test_clustering_dbscan.py index 3286f5a714d..714ca000838 100644 --- a/Orange/tests/test_clustering_dbscan.py +++ b/Orange/tests/test_clustering_dbscan.py @@ -42,13 +42,15 @@ def test_predict_numpy(self): self.assertEqual(len(self.iris), len(model.labels)) def test_predict_sparse_csc(self): - self.iris.X = csc_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X[::20]) c = self.dbscan(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_spares_csr(self): - self.iris.X = csr_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X[::20]) c = self.dbscan(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) diff --git a/Orange/tests/test_clustering_kmeans.py b/Orange/tests/test_clustering_kmeans.py index 7ff40d94992..1ab9043e964 100644 --- a/Orange/tests/test_clustering_kmeans.py +++ b/Orange/tests/test_clustering_kmeans.py @@ -44,13 +44,15 @@ def test_predict_numpy(self): self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse_csc(self): - self.iris.X = csc_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X[::20]) c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_spares_csr(self): - self.iris.X = csr_matrix(self.iris.X[::20]) + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X[::20]) c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) diff --git a/Orange/tests/test_clustering_louvain.py b/Orange/tests/test_clustering_louvain.py index a65ba4a8edf..7c6f3dd6b1c 100644 --- a/Orange/tests/test_clustering_louvain.py +++ b/Orange/tests/test_clustering_louvain.py @@ -44,13 +44,15 @@ def test_predict_numpy(self): self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse_csc(self): - self.iris.X = csc_matrix(self.iris.X[::5]) + with self.iris.unlocked(): + self.iris.X = csc_matrix(self.iris.X[::5]) c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) - def test_predict_spares_csr(self): - self.iris.X = csr_matrix(self.iris.X[::5]) + def test_predict_sparse_csr(self): + with self.iris.unlocked(): + self.iris.X = csr_matrix(self.iris.X[::5]) c = self.louvain(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) diff --git a/Orange/tests/test_contingency.py b/Orange/tests/test_contingency.py index f866de16899..2435d31900f 100644 --- a/Orange/tests/test_contingency.py +++ b/Orange/tests/test_contingency.py @@ -48,8 +48,9 @@ def test_discrete(self): def test_discrete_missing(self): d = data.Table("zoo") - d.Y[25] = float("nan") - d[0][0] = float("nan") + with d.unlocked(): + d.Y[25] = float("nan") + d[0][0] = float("nan") cont = contingency.Discrete(d, 0) assert_dist_equal(cont["amphibian"], [3, 0]) assert_dist_equal(cont, [[3, 0], [20, 0], [13, 0], [4, 4], @@ -60,8 +61,9 @@ def test_discrete_missing(self): [1, 0]) d = data.Table("zoo") - d.Y[2] = float("nan") - d[2]["predator"] = float("nan") + with d.unlocked(): + d.Y[2] = float("nan") + d[2]["predator"] = float("nan") cont = contingency.Discrete(d, "predator") assert_dist_equal(cont["fish"], [4, 8]) assert_dist_equal(cont, [[1, 3], [11, 9], [4, 8], [7, 1], @@ -73,10 +75,11 @@ def test_discrete_missing(self): def test_array_with_unknowns(self): d = data.Table("zoo") - d.Y[2] = float("nan") - d.Y[6] = float("nan") - d[2]["predator"] = float("nan") - d[4]["predator"] = float("nan") + with d.unlocked(): + d.Y[2] = float("nan") + d.Y[6] = float("nan") + d[2]["predator"] = float("nan") + d[4]["predator"] = float("nan") cont = contingency.Discrete(d, "predator") assert_dist_equal(cont.array_with_unknowns, [[1, 3, 0], [11, 9, 0], [4, 8, 0], [7, 1, 0], @@ -84,10 +87,11 @@ def test_array_with_unknowns(self): def test_discrete_with_fallback(self): d = data.Table("zoo") - d.Y[25] = None - d.Y[24] = None - d.X[0, 0] = None - d.X[24, 0] = None + with d.unlocked(): + d.Y[25] = None + d.Y[24] = None + d.X[0, 0] = None + d.X[24, 0] = None default = contingency.Discrete(d, 0) d._compute_contingency = Mock(side_effect=NotImplementedError) @@ -123,7 +127,8 @@ def test_continuous(self): def test_continuous_missing(self): d = data.Table("iris") - d[1][1] = float("nan") + with d.unlocked(): + d[1][1] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[2.3, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 4.1, 4.2, 4.4], @@ -133,7 +138,8 @@ def test_continuous_missing(self): np.testing.assert_almost_equal(cont["Iris-setosa"], correct) self.assertEqual(cont.unknowns, 0) - d.Y[0] = float("nan") + with d.unlocked(): + d.Y[0] = float("nan") cont = contingency.Continuous(d, "sepal width") correct = [[2.2, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.6, 3.8], [1, 4, 2, 4, 8, 2, 12, 4, 5, 3, 2, 1, 2]] @@ -146,7 +152,8 @@ def test_continuous_missing(self): 0., 0., 0., 0., 0., 0., 0.]) self.assertEqual(cont.unknowns, 0) - d.Y[1] = float("nan") + with d.unlocked(): + d.Y[1] = float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0]) np.testing.assert_almost_equal( @@ -156,7 +163,8 @@ def test_continuous_missing(self): self.assertEqual(cont.unknowns, 1) # this one was failing before since the issue in _contingecy.pyx - d.Y[:50] = np.zeros(50) * float("nan") + with d.unlocked(): + d.Y[:50] = np.zeros(50) * float("nan") cont = contingency.Continuous(d, "sepal width") np.testing.assert_almost_equal(cont.col_unknowns, [0, 0, 0]) np.testing.assert_almost_equal( @@ -171,7 +179,8 @@ def test_continuous_array_with_unknowns(): Test array_with_unknowns function """ d = data.Table("iris") - d.Y[:50] = np.zeros(50) * float("nan") + with d.unlocked(): + d.Y[:50] = np.zeros(50) * float("nan") cont = contingency.Continuous(d, "sepal width") correct_row_unknowns = [0., 0., 1., 0., 0., 0., 0., 0., 1., 6., 5., 5., 2., 9., 6., 2., 3., 4., 2., 1., 1., 1., 1.] @@ -200,8 +209,9 @@ def test_mixedtype_metas(self): cont = contingency.get_contingency(zoo, 2, t.domain.metas[1]) assert_dist_equal(cont["1"], [38, 5]) assert_dist_equal(cont, [[4, 54], [38, 5]]) - zoo[25][t.domain.metas[1]] = float("nan") - zoo[0][2] = float("nan") + with zoo.unlocked(): + zoo[25][t.domain.metas[1]] = float("nan") + zoo[0][2] = float("nan") cont = contingency.get_contingency(zoo, 2, t.domain.metas[1]) assert_dist_equal(cont["1"], [37, 5]) assert_dist_equal(cont, [[4, 53], [37, 5]]) @@ -235,6 +245,7 @@ def _construct_sparse(): 2, 5, 6, 13] indptr = [0, 11, 20, 23, 23, 27] X = sp.csr_matrix((sdata, indices, indptr), shape=(5, 20)) + X.data = X.data.copy() # make it the owner of it's data Y = np.array([[1, 2, 1, 0, 0]]).T return data.Table.from_numpy(domain, X, Y) @@ -255,7 +266,8 @@ def test_sparse(self): assert_dist_equal(cont["b"], [[1], [1]]) assert_dist_equal(cont[2], [[], []]) - d[4].set_class(1) + with d.unlocked(): + d[4].set_class(1) cont = contingency.Continuous(d, 13) assert_dist_equal(cont[0], [[], []]) assert_dist_equal(cont["b"], [[1, 1.1], [1, 1]]) @@ -333,9 +345,10 @@ def test_compute_contingency_row_attribute_sparse(self): Testing with sparse row variable since currently we do not test the situation when a row variable is sparse. """ - d = self.test9 # make X sparse - d.X = csr_matrix(d.X) + d = self.test9.copy() + with d.unlocked(): + d.X = csr_matrix(d.X) var1, var2 = d.domain[0], d.domain[1] cont = contingency.Discrete(d, var1, var2) assert_dist_equal(cont, [[1, 0], [1, 0], [1, 0], [1, 0], @@ -344,7 +357,9 @@ def test_compute_contingency_row_attribute_sparse(self): assert_dist_equal(cont, [[1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 1, 1]]) - d.X = csc_matrix(d.X) + d = self.test9.copy() + with d.unlocked(): + d.X = csc_matrix(d.X) cont = contingency.Discrete(d, var1, var2) assert_dist_equal(cont, [[1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1]]) @@ -365,7 +380,8 @@ def test_compute_contingency_invalid(self): c = contingency.get_contingency(d, X, C) self.assertEqual(c.counts.shape[0], 1024) - d.Y[5] = 1024 + with d.unlocked(): + d.Y[5] = 1024 with self.assertRaises(IndexError): contingency.get_contingency(d, X, C) diff --git a/Orange/tests/test_discretize.py b/Orange/tests/test_discretize.py index 119ac8c86dd..480cf179e6c 100644 --- a/Orange/tests/test_discretize.py +++ b/Orange/tests/test_discretize.py @@ -229,7 +229,8 @@ def test_transform(self): def test_remove_constant(self): table = data.Table('iris') - table[:, 0] = 1 + with table.unlocked(): + table[:, 0] = 1 discretize = Discretize(remove_const=True) new_table = discretize(table) self.assertNotEqual(len(table.domain.attributes), @@ -237,7 +238,8 @@ def test_remove_constant(self): def test_keep_constant(self): table = data.Table('iris') - table[:, 0] = 1 + with table.unlocked(): + table[:, 0] = 1 discretize = Discretize(remove_const=False) new_table = discretize(table) self.assertEqual(len(table.domain.attributes), diff --git a/Orange/tests/test_distribution.py b/Orange/tests/test_distribution.py index 1242999dc37..f0be73bf636 100644 --- a/Orange/tests/test_distribution.py +++ b/Orange/tests/test_distribution.py @@ -99,8 +99,9 @@ def test_fallback(self): def test_fallback_with_weights_and_nan(self): d = data.Table("zoo") - d.set_weights(np.random.uniform(0., 1., size=len(d))) - d.Y[::10] = np.nan + with d.unlocked(): + d.set_weights(np.random.uniform(0., 1., size=len(d))) + d.Y[::10] = np.nan default = distribution.Discrete(d, "type") d._compute_distributions = Mock(side_effect=NotImplementedError) @@ -206,7 +207,8 @@ def test_min_max(self): def test_array_with_unknowns(self): d = data.Table("zoo") - d.Y[0] = np.nan + with d.unlocked(): + d.Y[0] = np.nan disc = distribution.Discrete(d, "type") self.assertIsInstance(disc, np.ndarray) self.assertEqual(disc.unknowns, 1) @@ -473,7 +475,8 @@ def assert_dist_and_unknowns(computed, goal_dist): assert_dist_and_unknowns(ddist[18], [[0, 2], [4, 1]]) assert_dist_and_unknowns(ddist[19], zeros) - d.set_weights(np.array([1, 2, 3, 4, 5])) + with d.unlocked(): + d.set_weights(np.array([1, 2, 3, 4, 5])) ddist = distribution.get_distributions(d) self.assertEqual(len(ddist), 20) @@ -508,7 +511,9 @@ def test_compute_distributions_metas(self): # repeat with nan values assert d.metas.dtype.kind == "O" assert d.metas[0, 1] == 0 - d.metas[0, 1] = np.nan + + with d.unlocked(): + d.metas[0, 1] = np.nan dist, nanc = d._compute_distributions([variable])[0] assert_dist_equal(dist, [2, 3, 2]) self.assertEqual(nanc, 1) diff --git a/Orange/tests/test_evaluation_scoring.py b/Orange/tests/test_evaluation_scoring.py index bd89504a747..0fe8950c793 100644 --- a/Orange/tests/test_evaluation_scoring.py +++ b/Orange/tests/test_evaluation_scoring.py @@ -242,7 +242,7 @@ def test_call(self): def test_bayes(self): x = np.random.randint(2, size=(100, 5)) col = np.random.randint(5) - y = x[:, col].copy().reshape(100, 1) + y = x[:, col].reshape(100, 1).copy() t = Table.from_numpy(None, x, y) t = Discretize( method=discretize.EqualWidth(n=3))(t) @@ -250,7 +250,9 @@ def test_bayes(self): res = TestOnTrainingData()(t, [nb]) np.testing.assert_almost_equal(CA(res), [1]) - t.Y[-20:] = 1 - t.Y[-20:] + t = Table.from_numpy(None, t.X, t.Y.copy()) + with t.unlocked(): + t.Y[-20:] = 1 - t.Y[-20:] res = TestOnTrainingData()(t, [nb]) self.assertGreaterEqual(CA(res)[0], 0.75) self.assertLess(CA(res)[0], 1) diff --git a/Orange/tests/test_evaluation_testing.py b/Orange/tests/test_evaluation_testing.py index 561be10c6aa..6c88e4bffbe 100644 --- a/Orange/tests/test_evaluation_testing.py +++ b/Orange/tests/test_evaluation_testing.py @@ -46,7 +46,9 @@ def setUpClass(cls): cls.iris = Table('iris') cls.nrows = 200 cls.ncols = 5 - cls.random_table = random_data(cls.nrows, cls.ncols) + + def setUp(self): + self.random_table = random_data(self.nrows, self.ncols) def run_test_failed(self, method, succ_calls): # Can't use mocking helpers here (wrong result type for Majority, @@ -258,7 +260,8 @@ def test_miss_majority(): res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[-4:] = np.zeros((4, 3)) + with data.unlocked(data.X): + x[-4:] = np.zeros((4, 3)) res = cv(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) @@ -335,7 +338,8 @@ def add_meta_fold(data, f): ndata = data.transform(domain) vals = np.tile(range(f), len(data)//f + 1)[:len(data)] vals = vals.reshape((-1, 1)) - ndata[:, fat] = vals + with ndata.unlocked(ndata.metas): + ndata[:, fat] = vals return ndata def test_init(self): @@ -358,7 +362,8 @@ def test_unknown(self): t = self.random_table t = self.add_meta_fold(t, 3) fat = t.domain.metas[0] - t[0][fat] = float("nan") + with t.unlocked(t.metas): + t[0][fat] = float("nan") res = CrossValidationFeature(feature=fat)(t, [NaiveBayesLearner()]) self.assertNotIn(0, res.row_indices) @@ -440,11 +445,13 @@ def test_miss_majority(): res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[49] = 0 + with data.unlocked(data.X): + x[49] = 0 res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[25:] = 1 + with data.unlocked(data.X): + x[25:] = 1 data = Table.from_numpy(None, x, y) res = LeaveOneOut()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], @@ -516,11 +523,13 @@ def test_miss_majority(): res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[49] = 0 + with data.unlocked(data.X): + x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[25:] = 1 + with data.unlocked(data.X): + x[25:] = 1 data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0], res.predicted[0][0]) @@ -604,11 +613,13 @@ def test_miss_majority(): res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[49] = 0 + with data.unlocked(data.X): + x[49] = 0 res = TestOnTrainingData()(data, [MajorityLearner()]) np.testing.assert_equal(res.predicted[0][:49], 0) - x[25:] = 1 + with data.unlocked(data.X): + x[25:] = 1 y = x[:, -1] data = Table.from_numpy(None, x, y) res = TestOnTrainingData()(data, [MajorityLearner()]) diff --git a/Orange/tests/test_filter.py b/Orange/tests/test_filter.py index 325b4ae6d3a..b8cdd43957d 100644 --- a/Orange/tests/test_filter.py +++ b/Orange/tests/test_filter.py @@ -356,10 +356,12 @@ def test_operators(self): flt = FilterString("name", FilterString.IsDefined) self.assertTrue(flt(self.inst)) for s in ["?", "nan"]: - self.inst["name"] = s + with self.data.unlocked(): + self.inst["name"] = s flt = FilterString("name", FilterString.IsDefined) self.assertTrue(flt(self.inst)) - self.inst["name"] = "" + with self.data.unlocked(): + self.inst["name"] = "" flt = FilterString("name", FilterString.IsDefined) self.assertFalse(flt(self.inst)) diff --git a/Orange/tests/test_freeviz.py b/Orange/tests/test_freeviz.py index 4e07119359b..5504e76c0fa 100644 --- a/Orange/tests/test_freeviz.py +++ b/Orange/tests/test_freeviz.py @@ -18,7 +18,8 @@ def setUpClass(cls): def test_basic(self): table = self.iris.copy() - table[3, 3] = np.nan + with table.unlocked(): + table[3, 3] = np.nan freeviz = FreeViz() model = freeviz(table) proj = model(table) diff --git a/Orange/tests/test_impute.py b/Orange/tests/test_impute.py index 3b3ec9be3fa..74e328780b0 100644 --- a/Orange/tests/test_impute.py +++ b/Orange/tests/test_impute.py @@ -208,7 +208,8 @@ def test_sparse(self): """ table = self._create_table() domain = table.domain - table.X = sp.csr_matrix(table.X) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) v1, v2 = impute.AsValue()(table, domain[1]) self.assertTrue(np.all(np.isfinite(v2.compute_value(table)))) diff --git a/Orange/tests/test_majority.py b/Orange/tests/test_majority.py index 4b41acff84c..484f7c3240b 100644 --- a/Orange/tests/test_majority.py +++ b/Orange/tests/test_majority.py @@ -49,14 +49,17 @@ def test_empty(self): def test_missing(self): iris = Table('iris') learn = MajorityLearner() - for e in iris[: len(iris) // 2: 2]: - e.set_class("?") + sub_table = iris[: len(iris) // 2: 2] + with sub_table.unlocked(): + for e in sub_table: + e.set_class("?") clf = learn(iris) y = clf(iris) self.assertTrue((y == 2).all()) - for e in iris: - e.set_class("?") + with iris.unlocked(): + for e in iris: + e.set_class("?") clf = learn(iris) y = clf(iris) self.assertEqual(y.all(), 1) diff --git a/Orange/tests/test_normalize.py b/Orange/tests/test_normalize.py index d58e9daae9f..c35f98acfde 100644 --- a/Orange/tests/test_normalize.py +++ b/Orange/tests/test_normalize.py @@ -115,11 +115,12 @@ def test_normalize_sparse(self): self.assertEqual((normalized.X != solution).nnz, 0) # raise error for non-zero offsets - data.X = sp.csr_matrix(np.array([ - [0, 0, 0], - [0, 1, 3], - [0, 2, 4], - ])) + with data.unlocked(): + data.X = sp.csr_matrix(np.array([ + [0, 0, 0], + [0, 1, 3], + [0, 2, 4], + ])) with self.assertRaises(ValueError): normalizer(data) diff --git a/Orange/tests/test_preprocess.py b/Orange/tests/test_preprocess.py index b9076894ec7..f1ce7327813 100644 --- a/Orange/tests/test_preprocess.py +++ b/Orange/tests/test_preprocess.py @@ -69,7 +69,8 @@ def test_nothing_to_remove(self): class TestRemoveNaNRows(unittest.TestCase): def test_remove_row(self): data = Table("iris") - data.X[0, 0] = np.nan + with data.unlocked(): + data.X[0, 0] = np.nan pp_data = RemoveNaNRows()(data) self.assertEqual(len(pp_data), len(data) - 1) self.assertFalse(np.isnan(pp_data.X).any()) @@ -78,21 +79,24 @@ def test_remove_row(self): class TestRemoveNaNColumns(unittest.TestCase): def test_column_filtering(self): data = Table("iris") - data.X[:, (1, 3)] = np.NaN + with data.unlocked(): + data.X[:, (1, 3)] = np.NaN new_data = RemoveNaNColumns()(data) self.assertEqual(len(new_data.domain.attributes), len(data.domain.attributes) - 2) data = Table("iris") - data.X[0, 0] = np.NaN + with data.unlocked(): + data.X[0, 0] = np.NaN new_data = RemoveNaNColumns()(data) self.assertEqual(len(new_data.domain.attributes), len(data.domain.attributes)) def test_column_filtering_sparse(self): data = Table("iris") - data.X = csr_matrix(data.X) + with data.unlocked(): + data.X = csr_matrix(data.X) new_data = RemoveNaNColumns()(data) self.assertEqual(data, new_data) @@ -169,7 +173,8 @@ def test_dense_pps(self): np.testing.assert_array_equal(out, true_out) def test_sparse_pps(self): - self.data.X = csr_matrix(self.data.X) + with self.data.unlocked(): + self.data.X = csr_matrix(self.data.X) out = AdaptiveNormalize()(self.data) true_out = Scale(center=Scale.NoCentering, scale=Scale.Span)(self.data) np.testing.assert_array_equal(out, true_out) @@ -183,9 +188,11 @@ def setUp(self): self.data = Table.from_numpy(domain, np.zeros((3, 2))) def test_0_dense(self): - self.data[1:, 1] = 7 - true_out = self.data[:, 1] - true_out.X = true_out.X.reshape(-1, 1) + with self.data.unlocked(): + self.data[1:, 1] = 7 + true_out = self.data[:, 1].copy() + with true_out.unlocked(true_out.X): + true_out.X = true_out.X.reshape(-1, 1) out = RemoveSparse(0.5, True)(self.data) np.testing.assert_array_equal(out, true_out) @@ -193,10 +200,12 @@ def test_0_dense(self): np.testing.assert_array_equal(out, true_out) def test_0_sparse(self): - self.data[1:, 1] = 7 - true_out = self.data[:, 1] - self.data.X = csr_matrix(self.data.X) - true_out.X = csr_matrix(true_out.X) + with self.data.unlocked(): + self.data[1:, 1] = 7 + true_out = self.data[:, 1].copy() + self.data.X = csr_matrix(self.data.X) + with true_out.unlocked(true_out.X): + true_out.X = csr_matrix(true_out.X) out = RemoveSparse(0.5, True)(self.data).X np.testing.assert_array_equal(out, true_out) @@ -204,10 +213,12 @@ def test_0_sparse(self): np.testing.assert_array_equal(out, true_out) def test_nan_dense(self): - self.data[1:, 1] = np.nan - self.data.X[:, 0] = 7 - true_out = self.data[:, 0] - true_out.X = true_out.X.reshape(-1, 1) + with self.data.unlocked(): + self.data[1:, 1] = np.nan + self.data.X[:, 0] = 7 + true_out = self.data[:, 0].copy() + with true_out.unlocked(true_out.X): + true_out.X = true_out.X.reshape(-1, 1) out = RemoveSparse(0.5, False)(self.data) np.testing.assert_array_equal(out, true_out) @@ -215,12 +226,14 @@ def test_nan_dense(self): np.testing.assert_array_equal(out, true_out) def test_nan_sparse(self): - self.data[1:, 1] = np.nan - self.data.X[:, 0] = 7 - true_out = self.data[:, 0] - true_out.X = true_out.X.reshape(-1, 1) - self.data.X = csr_matrix(self.data.X) - true_out.X = csr_matrix(true_out.X) + with self.data.unlocked(): + self.data[1:, 1] = np.nan + self.data.X[:, 0] = 7 + true_out = self.data[:, 0].copy() + with true_out.unlocked(true_out.X): + true_out.X = true_out.X.reshape(-1, 1) + true_out.X = csr_matrix(true_out.X) + self.data.X = csr_matrix(self.data.X) out = RemoveSparse(0.5, False)(self.data) np.testing.assert_array_equal(out, true_out) diff --git a/Orange/tests/test_radviz.py b/Orange/tests/test_radviz.py index 27e817f9ee6..7bd658fcbb1 100644 --- a/Orange/tests/test_radviz.py +++ b/Orange/tests/test_radviz.py @@ -11,7 +11,8 @@ class TestRadViz(unittest.TestCase): @classmethod def setUpClass(cls): cls.iris = Table("iris") - cls.iris[3, 3] = np.nan + with cls.iris.unlocked(): + cls.iris[3, 3] = np.nan cls.titanic = Table("titanic") def test_radviz(self): diff --git a/Orange/tests/test_score_feature.py b/Orange/tests/test_score_feature.py index 1e27c872e9a..97a48e0baa2 100644 --- a/Orange/tests/test_score_feature.py +++ b/Orange/tests/test_score_feature.py @@ -114,7 +114,8 @@ def test_relieff(self): # some leeway for randomness in relieff random instance selection self.assertIn('tear_rate', found) # Ensure it doesn't crash on missing target class values - old_breast.Y[0] = np.nan + with old_breast.unlocked(): + old_breast.Y[0] = np.nan weights = ReliefF()(old_breast, None) np.testing.assert_array_equal( diff --git a/Orange/tests/test_softmax_regression.py b/Orange/tests/test_softmax_regression.py index 87c554d68bf..77b40dd45db 100644 --- a/Orange/tests/test_softmax_regression.py +++ b/Orange/tests/test_softmax_regression.py @@ -23,8 +23,9 @@ def test_SoftmaxRegression(self): def test_SoftmaxRegressionPreprocessors(self): table = self.iris.copy() - table.X[:, 2] = table.X[:, 2] * 0.001 - table.X[:, 3] = table.X[:, 3] * 0.001 + with table.unlocked(): + table.X[:, 2] = table.X[:, 2] * 0.001 + table.X[:, 3] = table.X[:, 3] * 0.001 learners = [SoftmaxRegressionLearner(preprocessors=[]), SoftmaxRegressionLearner()] cv = CrossValidation(k=10) diff --git a/Orange/tests/test_sparse_table.py b/Orange/tests/test_sparse_table.py index d4a1dbcfee6..0f6ada90b43 100644 --- a/Orange/tests/test_sparse_table.py +++ b/Orange/tests/test_sparse_table.py @@ -33,13 +33,15 @@ def test_value_assignment(self): def test_str(self): iris = Table('iris') - iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y) + with iris.unlocked(): + iris.X, iris.Y = csr_matrix(iris.X), csr_matrix(iris.Y) str(iris) def test_Y_setter_1d(self): iris = Table('iris') assert iris.Y.shape == (150,) - iris.Y = csr_matrix(iris.Y) + with iris.unlocked(): + iris.Y = csr_matrix(iris.Y) # We expect the Y shape to match the X shape, which is (150, 4) in iris self.assertEqual(iris.Y.shape, (150,)) @@ -48,8 +50,9 @@ def test_Y_setter_2d(self): assert iris.Y.shape == (150,) # Convert iris.Y to (150, 1) shape new_y = iris.Y[:, np.newaxis] - iris.Y = np.hstack((new_y, new_y)) - iris.Y = csr_matrix(iris.Y) + with iris.unlocked(): + iris.Y = np.hstack((new_y, new_y)) + iris.Y = csr_matrix(iris.Y) # We expect the Y shape to match the X shape, which is (150, 4) in iris self.assertEqual(iris.Y.shape, (150, 2)) @@ -57,7 +60,8 @@ def test_Y_setter_2d_single_instance(self): iris = Table('iris')[:1] # Convert iris.Y to (1, 1) shape new_y = iris.Y[:, np.newaxis] - iris.Y = np.hstack((new_y, new_y)) - iris.Y = csr_matrix(iris.Y) + with iris.unlocked(): + iris.Y = np.hstack((new_y, new_y)) + iris.Y = csr_matrix(iris.Y) # We expect the Y shape to match the X shape, which is (1, 4) in iris self.assertEqual(iris.Y.shape, (1, 2)) diff --git a/Orange/tests/test_svm.py b/Orange/tests/test_svm.py index 63f459880f8..372c3561286 100644 --- a/Orange/tests/test_svm.py +++ b/Orange/tests/test_svm.py @@ -18,7 +18,8 @@ class TestSVMLearner(unittest.TestCase): @classmethod def setUpClass(cls): cls.data = Table(test_filename('datasets/ionosphere.tab')) - cls.data.shuffle() + with cls.data.unlocked(): + cls.data.shuffle() def test_SVM(self): learn = SVMLearner() diff --git a/Orange/tests/test_table.py b/Orange/tests/test_table.py index db18026d09c..c75e49cd551 100644 --- a/Orange/tests/test_table.py +++ b/Orange/tests/test_table.py @@ -160,48 +160,65 @@ def test_indexing_assign_value(self): metaa = d.domain.index("a") self.assertEqual(d[0, "a"], "A") - d[0, "a"] = "B" + + with d.unlocked(): + d[0, "a"] = "B" self.assertEqual(d[0, "a"], "B") - d[0]["a"] = "A" + with d.unlocked(): + d[0]["a"] = "A" self.assertEqual(d[0, "a"], "A") - d[0, vara] = "B" + with d.unlocked(): + d[0, vara] = "B" self.assertEqual(d[0, "a"], "B") - d[0][vara] = "A" + with d.unlocked(): + d[0][vara] = "A" self.assertEqual(d[0, "a"], "A") - d[0, metaa] = "B" + with d.unlocked(): + d[0, metaa] = "B" self.assertEqual(d[0, "a"], "B") - d[0][metaa] = "A" + with d.unlocked(): + d[0][metaa] = "A" self.assertEqual(d[0, "a"], "A") - d[0, np.int_(metaa)] = "B" + with d.unlocked(): + d[0, np.int_(metaa)] = "B" self.assertEqual(d[0, "a"], "B") - d[0][np.int_(metaa)] = "A" + with d.unlocked(): + d[0][np.int_(metaa)] = "A" self.assertEqual(d[0, "a"], "A") # regular varb = d.domain["b"] self.assertEqual(d[0, "b"], 0) - d[0, "b"] = 42 + with d.unlocked(): + d[0, "b"] = 42 self.assertEqual(d[0, "b"], 42) - d[0]["b"] = 0 + with d.unlocked(): + d[0]["b"] = 0 self.assertEqual(d[0, "b"], 0) - d[0, varb] = 42 + with d.unlocked(): + d[0, varb] = 42 self.assertEqual(d[0, "b"], 42) - d[0][varb] = 0 + with d.unlocked(): + d[0][varb] = 0 self.assertEqual(d[0, "b"], 0) - d[0, 0] = 42 + with d.unlocked(): + d[0, 0] = 42 self.assertEqual(d[0, "b"], 42) - d[0][0] = 0 + with d.unlocked(): + d[0][0] = 0 self.assertEqual(d[0, "b"], 0) - d[0, np.int_(0)] = 42 + with d.unlocked(): + d[0, np.int_(0)] = 42 self.assertEqual(d[0, "b"], 42) - d[0][np.int_(0)] = 0 + with d.unlocked(): + d[0][np.int_(0)] = 0 self.assertEqual(d[0, "b"], 0) def test_indexing_assign_example(self): @@ -216,31 +233,39 @@ def almost_equal_list(s, t): d = data.Table("datasets/test2") self.assertFalse(isnan(d[0, "a"])) - d[0] = ["3.14", "1", "f"] + with d.unlocked(): + d[0] = ["3.14", "1", "f"] almost_equal_list(d[0].values(), [3.14, "1", "f"]) self.assertTrue(isnan(d[0, "a"])) - d[0] = [3.15, 1, "t"] + + with d.unlocked(): + d[0] = [3.15, 1, "t"] almost_equal_list(d[0].values(), [3.15, "0", "t"]) - d[np.int_(0)] = [3.15, 2, "f"] + + with d.unlocked(): + d[np.int_(0)] = [3.15, 2, "f"] almost_equal_list(d[0].values(), [3.15, 2, "f"]) - with self.assertRaises(ValueError): + with d.unlocked(), self.assertRaises(ValueError): d[0] = ["3.14", "1"] - with self.assertRaises(ValueError): + with d.unlocked(), self.assertRaises(ValueError): d[np.int_(0)] = ["3.14", "1"] ex = data.Instance(d.domain, ["3.16", "1", "f"]) - d[0] = ex + with d.unlocked(): + d[0] = ex almost_equal_list(d[0].values(), [3.16, "1", "f"]) ex = data.Instance(d.domain, ["3.16", 2, "t"]) - d[np.int_(0)] = ex + with d.unlocked(): + d[np.int_(0)] = ex almost_equal_list(d[0].values(), [3.16, 2, "t"]) ex = data.Instance(d.domain, ["3.16", "1", "f"]) ex["e"] = "mmmapp" - d[0] = ex + with d.unlocked(): + d[0] = ex almost_equal_list(d[0].values(), [3.16, "1", "f"]) self.assertEqual(d[0, "e"], "mmmapp") @@ -272,17 +297,21 @@ def test_assign_slice_value(self): with warnings.catch_warnings(): warnings.simplefilter("ignore") d = data.Table("datasets/test2") - d[2:5, 0] = 42 + with d.unlocked(): + d[2:5, 0] = 42 self.assertEqual([e[0] for e in d], [0, 1.1, 42, 42, 42, 2.25, 2.26, 3.333, Unknown]) - d[:3, "b"] = 43 + with d.unlocked(): + d[:3, "b"] = 43 self.assertEqual([e[0] for e in d], [43, 43, 43, 42, 42, 2.25, 2.26, 3.333, None]) - d[-2:, d.domain[0]] = 44 + with d.unlocked(): + d[-2:, d.domain[0]] = 44 self.assertEqual([e[0] for e in d], [43, 43, 43, 42, 42, 2.25, 2.26, 44, 44]) - d[2:5, "a"] = "A" + with d.unlocked(): + d[2:5, "a"] = "A" self.assertEqual([e["a"] for e in d], list("ABAAACCDE")) def test_multiple_indices(self): @@ -308,11 +337,13 @@ def test_assign_multiple_indices_value(self): warnings.simplefilter("ignore") d = data.Table("datasets/test2") - d[1:4, "b"] = 42 + with d.unlocked(): + d[1:4, "b"] = 42 self.assertEqual([e[0] for e in d], [0, 42, 42, 42, 2.24, 2.25, 2.26, 3.333, None]) - d[range(5, 2, -1), "b"] = None + with d.unlocked(): + d[range(5, 2, -1), "b"] = None self.assertEqual([e[d.domain[0]] for e in d], [0, 42, 42, None, "?", "", 2.26, 3.333, None]) @@ -324,7 +355,8 @@ def test_set_multiple_indices_example(self): d = data.Table("datasets/test2") vals = [e[0] for e in d] - d[[1, 2, 5]] = [42, None, None] + with d.unlocked(): + d[[1, 2, 5]] = [42, None, None] vals[1] = vals[2] = vals[5] = 42 self.assertEqual([e[0] for e in d], vals) @@ -340,16 +372,23 @@ def test_bool(self): def test_checksum(self): d = data.Table("zoo") - d[42, 3] = 0 + with d.unlocked(): + d[42, 3] = 0 crc1 = d.checksum(False) - d[42, 3] = 1 + + with d.unlocked(): + d[42, 3] = 1 crc2 = d.checksum(False) self.assertNotEqual(crc1, crc2) - d[42, 3] = 0 + + with d.unlocked(): + d[42, 3] = 0 crc3 = d.checksum(False) self.assertEqual(crc1, crc3) + _ = d[42, "name"] - d[42, "name"] = "non-animal" + with d.unlocked(): + d[42, "name"] = "non-animal" crc4 = d.checksum(False) self.assertEqual(crc1, crc4) crc4 = d.checksum(True) @@ -363,10 +402,11 @@ def test_total_weight(self): d = data.Table("zoo") self.assertEqual(d.total_weight(), len(d)) - d.set_weights(0) - d[0].weight = 0.1 - d[10].weight = 0.2 - d[-1].weight = 0.3 + with d.unlocked(): + d.set_weights(0) + d[0].weight = 0.1 + d[10].weight = 0.2 + d[-1].weight = 0.3 self.assertAlmostEqual(d.total_weight(), 0.6) def test_has_missing(self): @@ -374,15 +414,18 @@ def test_has_missing(self): self.assertFalse(d.has_missing()) self.assertFalse(d.has_missing_class()) - d[10, 3] = "?" + with d.unlocked(): + d[10, 3] = "?" self.assertTrue(d.has_missing()) self.assertFalse(d.has_missing_class()) - d[10].set_class("?") + with d.unlocked(): + d[10].set_class("?") self.assertTrue(d.has_missing()) self.assertTrue(d.has_missing_class()) - d = data.Table("datasets/test3") + with d.unlocked(): + d = data.Table("datasets/test3") self.assertFalse(d.has_missing()) self.assertFalse(d.has_missing_class()) @@ -391,19 +434,22 @@ def test_shuffle(self): crc = d.checksum() names = set(str(x["name"]) for x in d) - d.shuffle() + with d.unlocked(): + d.shuffle() self.assertNotEqual(crc, d.checksum()) self.assertSetEqual(names, set(str(x["name"]) for x in d)) crc2 = d.checksum() x = d[2:10] crcx = x.checksum() - d.shuffle() + with d.unlocked(): + d.shuffle() self.assertNotEqual(crc2, d.checksum()) self.assertEqual(crcx, x.checksum()) crc2 = d.checksum() - x.shuffle() + with x.unlocked(): + x.shuffle() self.assertNotEqual(crcx, x.checksum()) self.assertEqual(crc2, d.checksum()) @@ -443,7 +489,8 @@ def test_copy(self): self.assertTrue(np.all(t.X == copy.X)) self.assertTrue(np.all(t.Y == copy.Y)) self.assertTrue(np.all(t.metas == copy.metas)) - copy[0] = [1, 1, 1, 1, 1, 1, 1, 1] + with copy.unlocked(): + copy[0] = [1, 1, 1, 1] self.assertFalse(np.all(t.X == copy.X)) self.assertFalse(np.all(t.Y == copy.Y)) self.assertFalse(np.all(t.metas == copy.metas)) @@ -461,7 +508,8 @@ def test_copy_sparse(self): self.assertNotEqual(id(t.metas), id(copy.metas)) # ensure that copied sparse arrays do not share data - t.X[0, 0] = 42 + with t.unlocked(): + t.X[0, 0] = 42 self.assertEqual(copy.X[0, 0], 5.1) def test_concatenate(self): @@ -528,7 +576,8 @@ def test_concatenate(self): self.assertEqual(t123.name, "t2") self.assertEqual(t123.attributes, {"a": 42, "c": 43, "b": 45}) - t2.Y = np.atleast_2d(t2.Y).T + with t2.unlocked(t2.Y): + t2.Y = np.atleast_2d(t2.Y).T t12 = data.Table.concatenate((t1, t2)) self.assertEqual(t12.domain, t1.domain) np.testing.assert_almost_equal(t12.X, np.vstack((x1, x2))) @@ -549,7 +598,8 @@ def test_concatenate_exceptions(self): def test_concatenate_sparse(self): iris = Table("iris") - iris.X = sp.csc_matrix(iris.X) + with iris.unlocked(): + iris.X = sp.csc_matrix(iris.X) new = Table.concatenate([iris, iris]) self.assertEqual(len(new), 300) self.assertTrue(sp.issparse(new.X), "Concatenated X is not sparse.") @@ -639,7 +689,8 @@ def test_saveTab(self): os.remove("test-zoo.tab.metadata") d = data.Table("zoo") - d.set_weights(range(len(d))) + with d.unlocked(): + d.set_weights(range(len(d))) d.save("test-zoo-weights.tab") dd = data.Table("test-zoo-weights") try: @@ -669,26 +720,27 @@ def test_save_pickle(self): os.remove("iris.pickle") def test_from_numpy(self): - a = np.arange(20, dtype="d").reshape((4, 5)) + a = np.arange(20, dtype="d").reshape((4, 5)).copy() a[:, -1] = [0, 0, 0, 1] dom = data.Domain([data.ContinuousVariable(x) for x in "abcd"], data.DiscreteVariable("e", values=("no", "yes"))) table = data.Table(dom, a) - for i in range(4): - self.assertEqual(table[i].get_class(), "no" if i < 3 else "yes") - for j in range(5): - self.assertEqual(a[i, j], table[i, j]) - table[i, j] = random.random() - self.assertEqual(a[i, j], table[i, j]) - - with self.assertRaises(IndexError): + with table.unlocked(): + for i in range(4): + self.assertEqual(table[i].get_class(), "no" if i < 3 else "yes") + for j in range(5): + self.assertEqual(a[i, j], table[i, j]) + + with table.unlocked(), self.assertRaises(IndexError): table[0, -5] = 5 def test_filter_is_defined(self): d = data.Table("iris") - d[1, 4] = Unknown + with d.unlocked(): + d[1, 4] = Unknown self.assertTrue(isnan(d[1, 4])) - d[140, 0] = Unknown + with d.unlocked(): + d[140, 0] = Unknown e = filter.IsDefined()(d) self.assertEqual(len(e), len(d) - 2) self.assertEqual(e[0], d[0]) @@ -699,9 +751,11 @@ def test_filter_is_defined(self): def test_filter_has_class(self): d = data.Table("iris") - d[1, 4] = Unknown + with d.unlocked(): + d[1, 4] = Unknown self.assertTrue(isnan(d[1, 4])) - d[140, 0] = Unknown + with d.unlocked(): + d[140, 0] = Unknown e = filter.HasClass()(d) self.assertEqual(len(e), len(d) - 1) self.assertEqual(e[0], d[0]) @@ -817,7 +871,8 @@ def test_filter_value_continuous(self): x = filter.Values([f])(d) self.assertEqual(len(x), len(d)) - d[:30, v.petal_length] = Unknown + with d.unlocked(): + d[:30, v.petal_length] = Unknown x = filter.Values([f])(d) self.assertEqual(len(x), len(d) - 30) @@ -893,7 +948,8 @@ def test_valueFilter_discrete(self): f = filter.FilterDiscrete(v.hair, values=None) self.assertEqual(len(filter.Values([f])(d)), len(d)) - d[:5, v.hair] = Unknown + with d.unlocked(): + d[:5, v.hair] = Unknown self.assertEqual(len(filter.Values([f])(d)), len(d) - 5) def test_valueFilter_string_is_defined(self): @@ -979,7 +1035,8 @@ def test_valueFilter_string_case_sens(self): def test_valueFilter_string_case_insens(self): d = data.Table("zoo") - d[d[:, "name"].metas[:, 0] == "girl", "name"] = "GIrl" + with d.unlocked(): + d[d[:, "name"].metas[:, 0] == "girl", "name"] = "GIrl" col = d[:, "name"].metas[:, 0] @@ -1111,12 +1168,14 @@ def test_is_sparse(self): table = data.Table("iris") self.assertFalse(table.is_sparse()) - table.X = sp.csr_matrix(table.X) - self.assertTrue(table.is_sparse()) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) + self.assertTrue(table.is_sparse()) def test_repr_sparse_with_one_row(self): table = data.Table("iris")[:1] - table.X = sp.csr_matrix(table.X) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) repr(table) # make sure repr does not crash def test_inf(self): @@ -1152,11 +1211,13 @@ def setUp(self): def mock_domain(self, with_classes=False, with_metas=False): attributes = self.attributes + class_var = self.class_vars[0] if with_classes else None class_vars = self.class_vars if with_classes else [] metas = self.metas if with_metas else [] variables = attributes + class_vars return MagicMock(data.Domain, attributes=attributes, + class_var=class_var, class_vars=class_vars, metas=metas, variables=variables) @@ -1971,20 +2032,24 @@ def setUp(self): data.Table(self.domain, self.data, self.class_data, self.meta_data) def test_can_assign_values(self): - self.table[0, 0] = 42. + with self.table.unlocked(): + self.table[0, 0] = 42. self.assertAlmostEqual(self.table.X[0, 0], 42.) def test_can_assign_values_to_classes(self): a, _, _ = column_sizes(self.table) - self.table[0, a] = 42. + with self.table.unlocked(): + self.table[0, a] = 42. self.assertAlmostEqual(self.table.Y[0], 42.) def test_can_assign_values_to_metas(self): - self.table[0, -1] = 42. + with self.table.unlocked(): + self.table[0, -1] = 42. self.assertAlmostEqual(self.table.metas[0, 0], 42.) def test_can_assign_rows_to_rows(self): - self.table[0] = self.table[1] + with self.table.unlocked(): + self.table[0] = self.table[1] np.testing.assert_almost_equal( self.table.X[0], self.table.X[1]) np.testing.assert_almost_equal( @@ -1996,7 +2061,8 @@ def test_can_assign_lists(self): a, _, _ = column_sizes(self.table) new_example = [float(i) for i in range(len(self.attributes + self.class_vars))] - self.table[0] = new_example + with self.table.unlocked(): + self.table[0] = new_example np.testing.assert_almost_equal( self.table.X[0], np.array(new_example[:a])) np.testing.assert_almost_equal( @@ -2007,7 +2073,8 @@ def test_can_assign_np_array(self): new_example = \ np.array([float(i) for i in range(len(self.attributes + self.class_vars))]) - self.table[0] = new_example + with self.table.unlocked(): + self.table[0] = new_example np.testing.assert_almost_equal(self.table.X[0], new_example[:a]) np.testing.assert_almost_equal(self.table.Y[0], new_example[a:]) @@ -2081,17 +2148,19 @@ def test_value_indexing(self): def test_row_assignment(self): new_value = 2. - for i in range(self.nrows): - new_row = [new_value] * len(self.data[i]) - self.table[i] = np.array(new_row) - self.assertEqual(list(self.table[i]), new_row) + with self.table.unlocked(): + for i in range(self.nrows): + new_row = [new_value] * len(self.data[i]) + self.table[i] = np.array(new_row) + self.assertEqual(list(self.table[i]), new_row) def test_value_assignment(self): new_value = 0. - for i in range(self.nrows): - for j in range(len(self.table[i])): - self.table[i, j] = new_value - self.assertEqual(self.table[i, j], new_value) + with self.table.unlocked(): + for i in range(self.nrows): + for j in range(len(self.table[i])): + self.table[i, j] = new_value + self.assertEqual(self.table[i, j], new_value) def test_subclasses(self): from pathlib import Path @@ -2114,15 +2183,18 @@ def test_get_nan_frequency(self): self.assertEqual(table.get_nan_frequency_attribute(), 0) self.assertEqual(table.get_nan_frequency_class(), 0) - table.X[1, 2] = table.X[4, 5] = np.nan + with table.unlocked(): + table.X[1, 2] = table.X[4, 5] = np.nan self.assertEqual(table.get_nan_frequency_attribute(), 2 / table.X.size) self.assertEqual(table.get_nan_frequency_class(), 0) - table.Y[3:6] = np.nan + with table.unlocked(): + table.Y[3:6] = np.nan self.assertEqual(table.get_nan_frequency_attribute(), 2 / table.X.size) self.assertEqual(table.get_nan_frequency_class(), 3 / table.Y.size) - table.X[1, 2] = table.X[4, 5] = 0 + with table.unlocked(): + table.X[1, 2] = table.X[4, 5] = 0 self.assertEqual(table.get_nan_frequency_attribute(), 0) self.assertEqual(table.get_nan_frequency_class(), 3 / table.Y.size) @@ -2139,45 +2211,56 @@ def test_assignment(self): inst = table[2] self.assertIsInstance(inst, data.RowInstance) - inst[1] = 0 + with table.unlocked(): + inst[1] = 0 self.assertEqual(table[2, 1], 0) - inst[1] = 1 + with table.unlocked(): + inst[1] = 1 self.assertEqual(table[2, 1], 1) - inst.set_class("mammal") + with table.unlocked(): + inst.set_class("mammal") self.assertEqual(table[2, len(table.domain.attributes)], "mammal") - inst.set_class("fish") + with table.unlocked(): + inst.set_class("fish") self.assertEqual(table[2, len(table.domain.attributes)], "fish") - inst[-1] = "Foo" + with table.unlocked(): + inst[-1] = "Foo" self.assertEqual(table[2, -1], "Foo") def test_iteration_with_assignment(self): table = data.Table("iris") - for i, row in enumerate(table): - row[0] = i + with table.unlocked(): + for i, row in enumerate(table): + row[0] = i np.testing.assert_array_equal(table.X[:, 0], np.arange(len(table))) def test_sparse_assignment(self): X = np.eye(4) - Y = X[2] + Y = X[2].copy() table = data.Table.from_numpy(None, X, Y) row = table[1] self.assertFalse(sp.issparse(row.sparse_x)) self.assertEqual(row[0], 0) self.assertEqual(row[1], 1) - table.X = sp.csr_matrix(table.X) - table._Y = sp.csr_matrix(table._Y) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) + table.Y = sp.csr_matrix(table.Y) sparse_row = table[1] self.assertTrue(sp.issparse(sparse_row.sparse_x)) self.assertEqual(sparse_row[0], 0) self.assertEqual(sparse_row[1], 1) - sparse_row[1] = 0 + + with table.unlocked(): + sparse_row[1] = 0 self.assertEqual(sparse_row[1], 0) self.assertEqual(table.X[1, 1], 0) self.assertEqual(table[2][4], 1) - table[2][4] = 0 + + with table.unlocked(): + table[2][4] = 0 self.assertEqual(table[2][4], 0) diff --git a/Orange/widgets/tests/base.py b/Orange/widgets/tests/base.py index b4f44891927..b9610fa4979 100644 --- a/Orange/widgets/tests/base.py +++ b/Orange/widgets/tests/base.py @@ -830,7 +830,8 @@ def assertFontEqual(self, font1, font2): class AnchorProjectionWidgetTestMixin(ProjectionWidgetTestMixin): def test_embedding_missing_values(self): table = Table("heart_disease") - table.X[0] = np.nan + with table.unlocked(): + table.X[0] = np.nan self.send_signal(self.widget.Inputs.data, table) self.assertFalse(np.all(self.widget.valid_data)) output = self.get_output(ANNOTATED_DATA_SIGNAL_NAME) @@ -841,7 +842,8 @@ def test_embedding_missing_values(self): def test_sparse_data(self, timeout=DEFAULT_TIMEOUT): table = Table("iris") - table.X = sp.csr_matrix(table.X) + with table.unlocked(): + table.X = sp.csr_matrix(table.X) self.assertTrue(sp.issparse(table.X)) self.send_signal(self.widget.Inputs.data, table) self.assertTrue(self.widget.Error.sparse_data.is_shown()) @@ -852,7 +854,8 @@ def test_sparse_data(self, timeout=DEFAULT_TIMEOUT): def test_manual_move(self): data = self.data.copy() - data[1, 0] = np.nan + with data.unlocked(): + data[1, 0] = np.nan nvalid, nsample = len(self.data) - 1, self.widget.SAMPLE_SIZE self.send_signal(self.widget.Inputs.data, data) self.widget.graph.select_by_indices(list(range(0, len(data), 10))) @@ -961,7 +964,8 @@ def data_one_column_vals(cls, value=np.nan): ["", "", "", ""], "ynyn" ))) - table[:, 1] = value + with table.unlocked(): + table[:, 1] = value return table @classmethod diff --git a/Orange/widgets/utils/annotated_data.py b/Orange/widgets/utils/annotated_data.py index d48d5c62155..46149c15291 100644 --- a/Orange/widgets/utils/annotated_data.py +++ b/Orange/widgets/utils/annotated_data.py @@ -35,12 +35,14 @@ def _table_with_annotation_column(data, values, column_data, var_name): class_vars, metas = data.domain.class_vars, data.domain.metas if not data.domain.class_vars: class_vars += (var, ) + column_data = column_data.reshape((len(data), )) else: metas += (var, ) + column_data = column_data.reshape((len(data), 1)) domain = Domain(data.domain.attributes, class_vars, metas) table = data.transform(domain) with table.unlocked(table.Y if not data.domain.class_vars else table.metas): - table[:, var] = column_data.reshape((len(data), 1)) + table[:, var] = column_data return table diff --git a/Orange/widgets/visualize/owlinearprojection.py b/Orange/widgets/visualize/owlinearprojection.py index 8541969d099..c221f0a48b3 100644 --- a/Orange/widgets/visualize/owlinearprojection.py +++ b/Orange/widgets/visualize/owlinearprojection.py @@ -155,8 +155,9 @@ def normalized(a): if v.is_continuous and v is not attr_color], class_vars=attr_color ) - data = self.master.data.transform(domain) - data.X = normalized(data.X) + data = self.master.data.transform(domain, copy=True) + with data.unlocked(): + data.X = normalized(data.X) relief = ReliefF if attr_color.is_discrete else RReliefF weights = relief(n_iterations=100, k_nearest=self.minK)(data) results = sorted(zip(weights, domain.attributes), key=lambda x: (-x[0], x[1].name)) diff --git a/Orange/widgets/visualize/owsilhouetteplot.py b/Orange/widgets/visualize/owsilhouetteplot.py index b999e12aa76..1e9baede1a1 100644 --- a/Orange/widgets/visualize/owsilhouetteplot.py +++ b/Orange/widgets/visualize/owsilhouetteplot.py @@ -503,15 +503,18 @@ def commit(self): domain.attributes, domain.class_vars, domain.metas + (silhouette_var, )) - data = self.data.transform(domain) if np.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, np.flatnonzero(selectedmask)) if selected is not None: - selected[:, silhouette_var] = np.c_[scores[selectedmask]] - data[:, silhouette_var] = np.c_[scores] + with selected.unlocked(selected.metas): + selected[:, silhouette_var] = np.c_[scores[selectedmask]] + + data = self.data.transform(domain) + with data.unlocked(data.metas): + data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) diff --git a/Orange/widgets/visualize/tests/test_owboxplot.py b/Orange/widgets/visualize/tests/test_owboxplot.py index 4896051acef..21ca8c90dd2 100644 --- a/Orange/widgets/visualize/tests/test_owboxplot.py +++ b/Orange/widgets/visualize/tests/test_owboxplot.py @@ -66,21 +66,24 @@ def test_primitive_metas(self): def test_input_data_missings_cont_group_var(self): """Check widget with continuous data with missing values and group variable""" data = self.iris.copy() - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) # used to crash, see #1568 def test_input_data_missings_cont_no_group_var(self): """Check widget with continuous data with missing values and no group variable""" data = self.housing - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) # used to crash, see #1568 def test_input_data_missings_disc_group_var(self): """Check widget with discrete data with missing values and group variable""" data = self.zoo - data.X[:, 1] = np.nan + with data.unlocked(): + data.X[:, 1] = np.nan # This is a test and does it at its own risk: # pylint: disable=protected-access data.domain.attributes[1]._values = [] @@ -93,7 +96,8 @@ def test_input_data_missings_disc_no_group_var(self): """Check widget discrete data with missing values and no group variable""" data = self.zoo data.domain.class_var = ContinuousVariable("cls") - data.X[:, 1] = np.nan + with data.unlocked(): + data.X[:, 1] = np.nan # This is a test and does it at its own risk: # pylint: disable=protected-access data.domain.attributes[1]._values = [] @@ -253,7 +257,8 @@ def test_empty_groups(self): # select rows with US State equal to TX or MO use_indexes = np.array([0, 1, 25, 26, 27]) - table.X = table.X[use_indexes] + with table.unlocked(): + table.X = table.X[use_indexes] self.send_signal(self.widget.Inputs.data, table) self.assertEqual(2, len(self.widget.boxes)) diff --git a/Orange/widgets/visualize/tests/test_owdistributions.py b/Orange/widgets/visualize/tests/test_owdistributions.py index 22eddc30cc2..023b239f2dd 100644 --- a/Orange/widgets/visualize/tests/test_owdistributions.py +++ b/Orange/widgets/visualize/tests/test_owdistributions.py @@ -213,9 +213,10 @@ def test_switch_cvar(self): y = self.iris.domain.class_var extra = DiscreteVariable("foo", values=("a", "b")) domain = Domain(self.iris.domain.attributes + (extra, ), y) - data = self.iris.transform(domain) - data.X[:75, -1] = 0 - data.X[75:120, -1] = 1 + data = self.iris.transform(domain, copy=True) + with data.unlocked(): + data.X[:75, -1] = 0 + data.X[75:120, -1] = 1 self.send_signal(widget.Inputs.data, data) self._set_var(2) self._set_cvar(y) @@ -288,11 +289,12 @@ def test_set_valid_data(self): self.assertIsNotNone(widget.valid_group_data) self.assertTrue(widget.is_valid) - X, Y = self.iris.X, self.iris.Y - X[:, 0] = np.nan - X[:50, 1] = np.nan - X[:100, 2] = np.nan - Y[75:] = np.nan + with self.iris.unlocked(): + X, Y = self.iris.X, self.iris.Y + X[:, 0] = np.nan + X[:50, 1] = np.nan + X[:100, 2] = np.nan + Y[75:] = np.nan self.send_signal(widget.Inputs.data, self.iris) self._set_var(domain[0]) diff --git a/Orange/widgets/visualize/tests/test_owheatmap.py b/Orange/widgets/visualize/tests/test_owheatmap.py index c61075f77dd..809633b8d65 100644 --- a/Orange/widgets/visualize/tests/test_owheatmap.py +++ b/Orange/widgets/visualize/tests/test_owheatmap.py @@ -143,7 +143,8 @@ def test_cluster_column_on_all_zero_column(self): # Pearson distance used for clustering of columns does not # handle all zero columns well iris = Table("iris") - iris[:, 0] = 0 + with iris.unlocked(): + iris[:, 0] = 0 self.widget.col_clustering = True self.widget.set_dataset(iris) @@ -218,7 +219,8 @@ def test_set_split_var(self): def test_set_split_var_missing(self): data = self.brown_selected[::3].copy() - data.Y[::5] = np.nan + with data.unlocked(): + data.Y[::5] = np.nan w = self.widget self.send_signal(self.widget.Inputs.data, data, widget=w) self.assertIs(w.split_by_var, data.domain.class_var) @@ -328,15 +330,17 @@ def test_row_color_annotations(self): def test_row_color_annotations_with_na(self): widget = self.widget - data = self._brown_selected_10() - data.Y[:3] = np.nan - data.metas[:3, -1] = np.nan + data = self._brown_selected_10() + with data.unlocked(): + data.Y[:3] = np.nan + data.metas[:3, -1] = np.nan self.send_signal(widget.Inputs.data, data, widget=widget) widget.set_annotation_color_var(data.domain["function"]) self.assertTrue(widget.scene.widget.right_side_colors[0].isVisible()) widget.set_annotation_color_var(data.domain["diau g"]) - data.Y[:] = np.nan - data.metas[:, -1] = np.nan + with data.unlocked(): + data.Y[:] = np.nan + data.metas[:, -1] = np.nan self.send_signal(widget.Inputs.data, data, widget=widget) widget.set_annotation_color_var(data.domain["function"]) widget.set_annotation_color_var(data.domain["diau g"]) @@ -359,15 +363,17 @@ def test_col_color_annotations(self): def test_col_color_annotations_with_na(self): widget = self.widget data = self._brown_selected_10() - data.Y[:3] = np.nan - data.metas[:3, -1] = np.nan + with data.unlocked(): + data.Y[:3] = np.nan + data.metas[:3, -1] = np.nan data_t = data.transpose(data) self.send_signal(widget.Inputs.data, data_t, widget=widget) widget.set_column_annotation_color_var(data.domain["function"]) self.assertTrue(widget.scene.widget.top_side_colors[0].isVisible()) widget.set_column_annotation_color_var(data.domain["diau g"]) - data.Y[:] = np.nan - data.metas[:, -1] = np.nan + with data.unlocked(): + data.Y[:] = np.nan + data.metas[:, -1] = np.nan data_t = data.transpose(data) self.send_signal(widget.Inputs.data, data_t, widget=widget) widget.set_column_annotation_color_var(data.domain["function"]) diff --git a/Orange/widgets/visualize/tests/test_owlinearprojection.py b/Orange/widgets/visualize/tests/test_owlinearprojection.py index b0617355c61..820739bbaab 100644 --- a/Orange/widgets/visualize/tests/test_owlinearprojection.py +++ b/Orange/widgets/visualize/tests/test_owlinearprojection.py @@ -38,8 +38,9 @@ def test_nan_plot(self): simulate.combobox_run_through_all(self.widget.controls.attr_color) simulate.combobox_run_through_all(self.widget.controls.attr_size) - data.X[:, 0] = np.nan - data.Y[:] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan + data.Y[:] = np.nan self.send_signal(self.widget.Inputs.data, data) self.send_signal(self.widget.Inputs.data_subset, data[2:3]) simulate.combobox_run_through_all(self.widget.controls.attr_color) @@ -108,7 +109,8 @@ def assertErrorShown(data, is_shown): self.assertEqual(is_shown, self.widget.Error.no_valid_data.is_shown()) data = Table("iris")[::30] - data[:, 0] = np.nan + with data.unlocked(): + data[:, 0] = np.nan for data, is_shown in zip([None, data, Table("iris")[:30]], [False, True, False]): assertErrorShown(data, is_shown) diff --git a/Orange/widgets/visualize/tests/test_owlineplot.py b/Orange/widgets/visualize/tests/test_owlineplot.py index 0df9f81f750..2b0ced1e7c7 100644 --- a/Orange/widgets/visualize/tests/test_owlineplot.py +++ b/Orange/widgets/visualize/tests/test_owlineplot.py @@ -120,7 +120,8 @@ def test_select(self): def test_saved_selection(self): data = self.data.copy() - data[0, 0] = np.nan + with data.unlocked(): + data[0, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) mask = np.zeros(len(data) - 1, dtype=bool) mask[::10] = True @@ -185,7 +186,8 @@ def test_max_features(self): def test_data_with_missing_values(self): data = self.data.copy() - data[0, 0] = np.nan + with data.unlocked(): + data[0, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Information.hidden_instances.is_shown()) self.send_signal(self.widget.Inputs.data, None) diff --git a/Orange/widgets/visualize/tests/test_owprojectionwidget.py b/Orange/widgets/visualize/tests/test_owprojectionwidget.py index 9a6562b8712..83214a684b1 100644 --- a/Orange/widgets/visualize/tests/test_owprojectionwidget.py +++ b/Orange/widgets/visualize/tests/test_owprojectionwidget.py @@ -155,6 +155,7 @@ def get_embedding(self): if not len(x_data[self.valid_data]): return None + x_data = x_data.copy() x_data[x_data == np.inf] = np.nan x_data_ = np.ones(len(x_data)) y_data = np.ones(len(x_data)) @@ -177,7 +178,8 @@ def setUp(self): def test_annotation_with_nans(self): data = Table.from_table_rows(self.data, [0, 1, 2]) - data.X[1, :] = np.nan + with data.unlocked(): + data.X[1, :] = np.nan self.send_signal(self.widget.Inputs.data, data) points = self.widget.graph.scatterplot_item.points() self.widget.graph.select_by_click(None, [points[1]]) diff --git a/Orange/widgets/visualize/tests/test_owscatterplot.py b/Orange/widgets/visualize/tests/test_owscatterplot.py index 39cf81dcc2a..f4a93c27e87 100644 --- a/Orange/widgets/visualize/tests/test_owscatterplot.py +++ b/Orange/widgets/visualize/tests/test_owscatterplot.py @@ -107,7 +107,8 @@ def test_error_message(self): """Check if error message appears and then disappears when data is removed from input""" data = self.data.copy() - data.X[:, 0] = np.nan + with data.unlocked(): + data.X[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Warning.missing_coords.is_shown()) self.send_signal(self.widget.Inputs.data, None) @@ -287,7 +288,8 @@ def test_invalid_points_selection(self): "selection_group": [(i, 1) for i in range(50)]} ) data = self.data.copy()[:11] - data[0, 0] = np.nan + with data.unlocked(): + data[0, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertIsNone(self.get_output(self.widget.Outputs.selected_data)) @@ -393,12 +395,14 @@ def assert_vizrank_enabled(data, is_enabled): data1 = Table("iris")[::30] data2 = Table("iris")[::30] - data2.Y[:] = np.nan + with data2.unlocked(): + data2.Y[:] = np.nan domain = Domain( attributes=data2.domain.attributes[:4], class_vars=DiscreteVariable("iris", values=())) data2 = Table(domain, data2.X, Y=data2.Y) data3 = Table("iris")[::30] - data3.Y[:] = np.nan + with data3.unlocked(): + data3.Y[:] = np.nan for data, is_enabled in zip([data1, data2, data1, data3, data1], [True, False, True, False, True]): @@ -548,9 +552,10 @@ def test_handle_metas(self): class_vars=data.domain.class_vars, metas=data.domain.attributes[2:] ) - data = data.transform(domain) + data = data.transform(domain, copy=True) # Sometimes floats in metas are saved as objects - data.metas = data.metas.astype(object) + with data.unlocked(): + data.metas = data.metas.astype(object) self.send_signal(w.Inputs.data, data) simulate.combobox_activate_item(w.cb_attr_x, data.domain.metas[1].name) simulate.combobox_activate_item(w.controls.attr_color, data.domain.metas[0].name) @@ -590,8 +595,9 @@ def test_metas_zero_column(self): data = Table("iris") domain = data.domain domain = Domain(domain.attributes[:3], domain.class_vars, domain.attributes[3:]) - data = data.transform(domain) - data.metas[:, 0] = 0 + data = data.transform(domain, copy=True) + with data.unlocked(): + data.metas[:, 0] = 0 w = self.widget self.send_signal(w.Inputs.data, data) simulate.combobox_activate_item(w.controls.attr_x, domain.metas[0].name) @@ -661,8 +667,12 @@ def prepare_data(): data = Table("iris") values = list(range(15)) class_var = DiscreteVariable("iris5", values=[str(v) for v in values]) - data = data.transform(Domain(attributes=data.domain.attributes, class_vars=[class_var])) - data.Y = np.array(values * 10, dtype=float) + data = data.transform( + Domain(attributes=data.domain.attributes, + class_vars=[class_var]), + copy=True) + with data.unlocked(): + data.Y = np.array(values * 10, dtype=float) return data def assert_equal(data, max): @@ -673,7 +683,8 @@ def assert_equal(data, max): assert_equal(prepare_data(), MAX_COLORS) # data with nan value data = prepare_data() - data.Y[42] = np.nan + with data.unlocked(): + data.Y[42] = np.nan assert_equal(data, MAX_COLORS + 1) def test_change_data(self): @@ -830,7 +841,8 @@ def test_regression_lines_appear(self): simulate.combobox_activate_index(self.widget.controls.attr_color, 0) self.assertEqual(len(self.widget.graph.reg_line_items), 1) data = self.data.copy() - data[:, 0] = np.nan + with data.unlocked(): + data[:, 0] = np.nan self.send_signal(self.widget.Inputs.data, data) self.assertEqual(len(self.widget.graph.reg_line_items), 0) diff --git a/Orange/widgets/visualize/tests/test_owsilhouetteplot.py b/Orange/widgets/visualize/tests/test_owsilhouetteplot.py index 4791a155266..0df13d55b8f 100644 --- a/Orange/widgets/visualize/tests/test_owsilhouetteplot.py +++ b/Orange/widgets/visualize/tests/test_owsilhouetteplot.py @@ -65,7 +65,8 @@ def test_insufficient_clusters(self): def test_unknowns_in_labels(self): data = self.data[[0, 1, 2, 50, 51, 52, 100, 101, 102]] - data.Y[::3] = np.nan + with data.unlocked(data.Y): + data.Y[::3] = np.nan valid = ~np.isnan(data.Y.flatten()) self.send_signal(self.widget.Inputs.data, data) output = self.get_output(ANNOTATED_DATA_SIGNAL_NAME) @@ -87,7 +88,8 @@ def test_nan_distances(self): self.assertEqual(self.widget.Distances[self.widget.distance_idx][0], 'Cosine') data = self.data[[0, 1, 2, 50, 51, 52, 100, 101, 102]] - data.X[::3] = 0 + with data.unlocked(data.X): + data.X[::3] = 0 valid = np.any(data.X != 0, axis=1) self.assertFalse(self.widget.Warning.nan_distances.is_shown()) self.send_signal(self.widget.Inputs.data, data) diff --git a/Orange/widgets/visualize/tests/test_owvenndiagram.py b/Orange/widgets/visualize/tests/test_owvenndiagram.py index 9d025291cea..48361031c40 100644 --- a/Orange/widgets/visualize/tests/test_owvenndiagram.py +++ b/Orange/widgets/visualize/tests/test_owvenndiagram.py @@ -200,7 +200,8 @@ def test_multiple_input_over_cols(self): input2.metas) #domain matches but the values do not - input2.X = input2.X - 1 + with input2.unlocked(input2.X): + input2.X = input2.X - 1 self.send_signal(self.signal_name, input2, (2, 'Data', None)) self.widget.vennwidget.vennareas()[3].setSelected(True) annotated = self.get_output(self.widget.Outputs.annotated_data) diff --git a/Orange/widgets/visualize/utils/widget.py b/Orange/widgets/visualize/utils/widget.py index 84f03ff768a..5b34327f770 100644 --- a/Orange/widgets/visualize/utils/widget.py +++ b/Orange/widgets/visualize/utils/widget.py @@ -584,7 +584,8 @@ def _get_projection_data(self): data = self.data.transform(Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + variables)) - data.metas[:, -2:] = self.get_embedding() + with data.unlocked(data.metas): + data.metas[:, -2:] = self.get_embedding() return data def _get_projection_variables(self):