Table: Add locking

biolab · Jun 11, 2021 · 408fa64 · 408fa64
1 parent f02f102
commit 408fa64
Show file tree

Hide file tree

Showing 88 changed files with 1,373 additions and 775 deletions.
diff --git a/Orange/base.py b/Orange/base.py
@@ -664,6 +664,13 @@ def __init__(self, cat_model, cat_features, domain):
         self.cat_model = cat_model
         self.cat_features = cat_features
 
+    def __call__(self, data, ret=Model.Value):
+        if isinstance(data, Table):
+            with data.force_unlocked(data.X):
+                return super().__call__(data, ret)
+        else:
+            return super().__call__(data, ret)
+
     def predict(self, X):
         if self.cat_features:
             X = X.astype(str)
@@ -824,17 +831,18 @@ def __call__(self, data, progress_callback=None):
         return m
 
     def fit_storage(self, data: Table):
-        domain, X, Y, W = data.domain, data.X, data.Y.reshape(-1), None
-        if self.supports_weights and data.has_weights():
-            W = data.W.reshape(-1)
-        # pylint: disable=not-callable
-        clf = self.__wraps__(**self.params)
-        cat_features = [i for i, attr in enumerate(domain.attributes)
-                        if attr.is_discrete]
-        if cat_features:
-            X = X.astype(str)
-        cat_model = clf.fit(X, Y, cat_features=cat_features, sample_weight=W)
-        return self.__returns__(cat_model, cat_features, domain)
+        with data.force_unlocked(data.X):
+            domain, X, Y, W = data.domain, data.X, data.Y.reshape(-1), None
+            if self.supports_weights and data.has_weights():
+                W = data.W.reshape(-1)
+            # pylint: disable=not-callable
+            clf = self.__wraps__(**self.params)
+            cat_features = [i for i, attr in enumerate(domain.attributes)
+                            if attr.is_discrete]
+            if cat_features:
+                X = X.astype(str)
+            cat_model = clf.fit(X, Y, cat_features=cat_features, sample_weight=W)
+            return self.__returns__(cat_model, cat_features, domain)
 
     def __getattr__(self, item):
         try:

diff --git a/Orange/classification/_tree_scorers.pyx b/Orange/classification/_tree_scorers.pyx
@@ -17,7 +17,7 @@ cdef extern from "numpy/npy_math.h":
 cpdef enum:
     NULL_BRANCH = -1
 
-def contingency(double[:] x, int nx, double[:] y, int ny):
+def contingency(const double[:] x, int nx, const double[:] y, int ny):
     cdef:
         np.ndarray[np.uint32_t, ndim=2] cont = np.zeros((ny, nx), dtype=np.uint32)
         int n = len(x), yi, xi
@@ -28,7 +28,8 @@ def contingency(double[:] x, int nx, double[:] y, int ny):
             cont[yi, xi] += 1
     return cont
 
-def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx,
+def find_threshold_entropy(const double[:] x, const double[:] y,
+                           const np.intp_t[:] idx,
                            int n_classes, int min_leaf):
     """
     Find the threshold for continuous attribute values that maximizes
@@ -89,8 +90,9 @@ def find_threshold_entropy(double[:] x, double[:] y, np.intp_t[:] idx,
     return (class_entro - best_entro) / N / log(2), x[idx[best_idx]]
 
 
-def find_binarization_entropy(double[:, :] cont, double[:] class_distr,
-                              double[:] val_distr, int min_leaf):
+def find_binarization_entropy(const double[:, :] cont,
+                              const double[:] class_distr,
+                              const double[:] val_distr, int min_leaf):
     """
     Find the split of discrete values into two groups that optimizes information
     gain.
@@ -187,7 +189,9 @@ def find_binarization_entropy(double[:, :] cont, double[:] class_distr,
     return (class_entro - best_entro) / N / log(2), best_mapping
 
 
-def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf):
+def find_threshold_MSE(const double[:] x,
+                       const double[:] y,
+                       const np.intp_t[:] idx, int min_leaf):
     """
     Find the threshold for continuous attribute values that minimizes MSE.
 
@@ -232,7 +236,8 @@ def find_threshold_MSE(double[:] x, double[:] y, np.intp_t[:] idx, int min_leaf)
     return (best_inter - (sum * sum) / N) / N, x[idx[best_idx]]
 
 
-def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
+def find_binarization_MSE(const double[:] x,
+                          const double[:] y, int n_values, int min_leaf):
     """
     Find the split of discrete values into two groups that minimizes the MSE.
 
@@ -315,7 +320,9 @@ def find_binarization_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
     return (best_inter - start_inter) / x.shape[0], best_mapping
 
 
-def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
+def compute_grouped_MSE(const double[:] x,
+                        const double[:] y,
+                        int n_values, int min_leaf):
     """
     Compute the MSE decrease of the given split into groups.
 
@@ -371,8 +378,10 @@ def compute_grouped_MSE(double[:] x, double[:] y, int n_values, int min_leaf):
     return (inter - sum * sum / n) / x.shape[0]
 
 
-def compute_predictions(double[:, :] X, int[:] code,
-                        double[:, :] values, double[:] thresholds):
+def compute_predictions(const double[:, :] X,
+                        const int[:] code,
+                        const double[:, :] values,
+                        const double[:] thresholds):
     """
     Return the values (distributions, means and variances) stored in the nodes
     to which the tree classify the rows in X.
@@ -419,8 +428,10 @@ def compute_predictions(double[:, :] X, int[:] code,
     return np.asarray(predictions)
 
 
-def compute_predictions_csr(X, int[:] code,
-                               double[:, :] values, double[:] thresholds):
+def compute_predictions_csr(X,
+                            const int[:] code,
+                            const double[:, :] values,
+                            const double[:] thresholds):
     """
     Same as compute_predictions except for sparse data
     """
@@ -431,9 +442,9 @@ def compute_predictions_csr(X, int[:] code,
         double[: ,:] predictions = np.empty(
             (X.shape[0], values.shape[1]), dtype=np.float64)
 
-        double[:] data = X.data
-        np.int32_t[:] indptr = X.indptr
-        np.int32_t[:] indices = X.indices
+        const double[:] data = X.data
+        const np.int32_t[:] indptr = X.indptr
+        const np.int32_t[:] indices = X.indices
         int ind, attr, n_rows
 
     n_rows = X.shape[0]
@@ -463,8 +474,10 @@ def compute_predictions_csr(X, int[:] code,
                 predictions[i, j] = values[node_idx, j]
     return np.asarray(predictions)
 
-def compute_predictions_csc(X, int[:] code,
-                               double[:, :] values, double[:] thresholds):
+def compute_predictions_csc(X,
+                            const int[:] code,
+                            const double[:, :] values,
+                            const double[:] thresholds):
     """
     Same as compute_predictions except for sparse data
     """
@@ -475,9 +488,9 @@ def compute_predictions_csc(X, int[:] code,
         double[: ,:] predictions = np.empty(
             (X.shape[0], values.shape[1]), dtype=np.float64)
 
-        double[:] data = X.data
-        np.int32_t[:] indptr = X.indptr
-        np.int32_t[:] indices = X.indices
+        const double[:] data = X.data
+        const np.int32_t[:] indptr = X.indptr
+        const np.int32_t[:] indices = X.indices
         int ind, attr, n_rows
 
     n_rows = X.shape[0]

diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py
@@ -112,7 +112,7 @@ def _score_disc():
             cont_entr = np.sum(cont * np.log(cont))
             score = (class_entr - attr_entr + cont_entr) / n / np.log(2)
             score *= n / len(data)  # punishment for missing values
-            branches = col_x
+            branches = col_x.copy()
             branches[np.isnan(branches)] = -1
             if score == 0:
                 return REJECT_ATTRIBUTE

diff --git a/Orange/data/instance.py b/Orange/data/instance.py
@@ -34,11 +34,12 @@ def __init__(self, domain, data=None, id=None):
             self._weight = 1
         elif isinstance(data, Instance) and data.domain == domain:
             self._x = np.array(data._x)
-            self._y = np.array(data._y)
+            self._y = np.atleast_1d(np.array(data._y))
             self._metas = np.array(data._metas)
             self._weight = data._weight
         else:
             self._x, self._y, self._metas = domain.convert(data)
+            self._y = np.atleast_1d(self._y)
             self._weight = 1
 
         if id is not None:
@@ -116,7 +117,10 @@ def __getitem__(self, key):
         if 0 <= idx < len(self._domain.attributes):
             value = self._x[idx]
         elif idx >= len(self._domain.attributes):
-            value = self._y[idx - len(self.domain.attributes)]
+            if self._y.ndim == 0:
+                value = self._y
+            else:
+                value = self._y[idx - len(self.domain.attributes)]
         else:
             value = self._metas[-1 - idx]
         var = self._domain[idx]

diff --git a/Orange/data/pandas_compat.py b/Orange/data/pandas_compat.py
@@ -274,6 +274,10 @@ def vars_from_df(df, role=None, force_nominal=False):
                       for var, col, expr in zip(Avars, Acols, Aexpr)]).T
         XYM.append(A)
 
+    # Let the tables share memory with pandas frame
+    if XYM[1] is not None and XYM[1].ndim == 2 and XYM[1].shape[1] == 1:
+        XYM[1] = XYM[1][:, 0]
+
     return XYM, Domain(attrs, class_vars, metas)
 
 

diff --git a/Orange/data/sql/table.py b/Orange/data/sql/table.py
@@ -556,7 +556,8 @@ def _filter_values(self, f):
         return t2
 
     @classmethod
-    def from_table(cls, domain, source, row_indices=...):
+    def from_table(cls, domain, source, row_indices=..., *, copy=False):
+        # pylint: disable=unused-argument
         assert row_indices is ...
 
         table = source.copy()