From e39db1defb54a89d1cd5a3bff2eab7a7d4528696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Sun, 29 Jan 2017 09:21:51 +0100 Subject: [PATCH 1/5] Fitter: Fix infinite recursion in __getattr__ This occurred when a large dataset was used in the test and score widget using cross validation. Changing the exception from AttributeError to TypeError fixed this problem. --- Orange/modelling/base.py | 4 +--- Orange/tests/test_fitter.py | 10 +++++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Orange/modelling/base.py b/Orange/modelling/base.py index 56e545ac460..1a1199d1b0d 100644 --- a/Orange/modelling/base.py +++ b/Orange/modelling/base.py @@ -54,9 +54,7 @@ def get_learner(self, problem_type): """Get the learner for a given problem type.""" # Prevent trying to access the learner when problem type is None if problem_type not in self.__fits__: - # We're mostly called from __getattr__ via getattr, so we should - # raise AttributeError instead of TypeError - raise AttributeError("No learner to handle '{}'".format(problem_type)) + raise TypeError("No learner to handle '{}'".format(problem_type)) if self.__learners[problem_type] is None: learner = self.__fits__[problem_type](**self.__kwargs(problem_type)) learner.use_default_preprocessors = self.use_default_preprocessors diff --git a/Orange/tests/test_fitter.py b/Orange/tests/test_fitter.py index e428dcbfa3c..f1d5d4c165d 100644 --- a/Orange/tests/test_fitter.py +++ b/Orange/tests/test_fitter.py @@ -1,8 +1,9 @@ import unittest -from unittest.mock import Mock +from unittest.mock import Mock, patch from Orange.classification.base_classification import LearnerClassification from Orange.data import Table +from Orange.evaluation import CrossValidation from Orange.modelling import Fitter from Orange.preprocess import Randomize from Orange.regression.base_regression import LearnerRegression @@ -92,14 +93,13 @@ class DummyFitter(Fitter): fitter = DummyFitter() self.assertEqual(fitter.get_learner(Fitter.CLASSIFICATION).param, 1) self.assertEqual(fitter.get_learner(Fitter.REGRESSION).param, 2) - self.assertEqual(fitter.name, 'dummy') # Pass specific params try: fitter = DummyFitter(classification_param=10, regression_param=20) self.assertEqual(fitter.get_learner(Fitter.CLASSIFICATION).param, 10) self.assertEqual(fitter.get_learner(Fitter.REGRESSION).param, 20) - except AttributeError: + except TypeError: self.fail('Fitter did not properly distribute params to learners') def test_error_for_data_type_with_no_learner(self): @@ -126,3 +126,7 @@ def test_correctly_sets_preprocessors_on_learner(self): self.assertEqual( tuple(learner.active_preprocessors), (pp,), 'Fitter did not properly pass its preprocessors to its learners') + + def test_n_jobs_fitting(self): + with patch('Orange.evaluation.testing.CrossValidation._MIN_NJOBS_X_SIZE', 1): + CrossValidation(self.heart_disease, [DummyFitter()], k=5, n_jobs=5) From 8ee65c0f5905cf6c9bd7297d5dd256a274a8d303 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Thu, 2 Feb 2017 16:59:57 +0100 Subject: [PATCH 2/5] Fitter: Remove state from fitter --- Orange/base.py | 14 ++++++++------ Orange/modelling/base.py | 29 ++++++++++++++--------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Orange/base.py b/Orange/base.py index 2913e775b8b..1f4768c7b12 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -114,12 +114,7 @@ def __call__(self, data): self.__class__.__name__) self.domain = data.domain - - if type(self).fit is Learner.fit: - model = self.fit_storage(data) - else: - X, Y, W = data.X, data.Y, data.W if data.has_weights() else None - model = self.fit(X, Y, W) + model = self._fit_model(data) model.domain = data.domain model.supports_multiclass = self.supports_multiclass model.name = self.name @@ -127,6 +122,13 @@ def __call__(self, data): model.original_data = origdata return model + def _fit_model(self, data): + if type(self).fit is Learner.fit: + return self.fit_storage(data) + else: + X, Y, W = data.X, data.Y, data.W if data.has_weights() else None + return self.fit(X, Y, W) + def preprocess(self, data): """Apply the `preprocessors` to the data""" for pp in self.active_preprocessors: diff --git a/Orange/modelling/base.py b/Orange/modelling/base.py index 1a1199d1b0d..76cdde482aa 100644 --- a/Orange/modelling/base.py +++ b/Orange/modelling/base.py @@ -38,17 +38,22 @@ class Fitter(Learner, metaclass=FitterMeta): def __init__(self, preprocessors=None, **kwargs): super().__init__(preprocessors=preprocessors) - self.kwargs = kwargs + self.params = kwargs # Make sure to pass preprocessor params to individual learners - self.kwargs['preprocessors'] = preprocessors - self.problem_type = None + self.params['preprocessors'] = preprocessors self.__learners = {self.CLASSIFICATION: None, self.REGRESSION: None} - def __call__(self, data): - # Set the appropriate problem type from the data - self.problem_type = self.CLASSIFICATION if \ - data.domain.has_discrete_class else self.REGRESSION - return self.get_learner(self.problem_type)(data) + def _fit_model(self, data): + if data.domain.has_discrete_class: + learner = self.get_learner(self.CLASSIFICATION) + else: + learner = self.get_learner(self.REGRESSION) + + if type(self).fit is Learner.fit: + return learner.fit_storage(data) + else: + X, Y, W = data.X, data.Y, data.W if data.has_weights() else None + return learner.fit(X, Y, W) def get_learner(self, problem_type): """Get the learner for a given problem type.""" @@ -64,7 +69,7 @@ def get_learner(self, problem_type): def __kwargs(self, problem_type): learner_kwargs = set( self.__fits__[problem_type].__init__.__code__.co_varnames[1:]) - changed_kwargs = self._change_kwargs(self.kwargs, self.problem_type) + changed_kwargs = self._change_kwargs(self.params, problem_type) return {k: v for k, v in changed_kwargs.items() if k in learner_kwargs} def _change_kwargs(self, kwargs, problem_type): @@ -90,9 +95,3 @@ def supports_weights(self): and self.get_learner(self.CLASSIFICATION).supports_weights) and ( hasattr(self.get_learner(self.REGRESSION), 'supports_weights') and self.get_learner(self.REGRESSION).supports_weights) - - def __getattr__(self, item): - # Make parameters accessible on the learner for simpler testing - if item in self.kwargs: - return self.kwargs[item] - return getattr(self.get_learner(self.problem_type), item) From 928f3b518975f1d6ea4cb60e414f129c42a15eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Thu, 2 Feb 2017 17:56:17 +0100 Subject: [PATCH 3/5] Learner: The attribute is now expected everywhere --- Orange/base.py | 2 + Orange/classification/tree.py | 2 + Orange/modelling/base.py | 25 +++++++++-- Orange/regression/tree.py | 2 + .../tests/test_owadaboostregression.py | 3 +- .../regression/tests/test_owsgdregression.py | 6 ++- Orange/widgets/tests/base.py | 41 ++++++++++--------- 7 files changed, 54 insertions(+), 27 deletions(-) diff --git a/Orange/base.py b/Orange/base.py index 1f4768c7b12..59ccfd089fd 100644 --- a/Orange/base.py +++ b/Orange/base.py @@ -72,6 +72,8 @@ class Learner(_ReprableWithPreprocessors): This property is needed mainly because of the `Fitter` class, which can not know in advance, which preprocessors it will need to use. Therefore this resolves the active preprocessors using a lazy approach. + params : dict + The params that the learner is constructed with. """ supports_multiclass = False diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py index dea3783a535..3da76f5d217 100644 --- a/Orange/classification/tree.py +++ b/Orange/classification/tree.py @@ -64,6 +64,8 @@ def __init__( self.min_samples_split = min_samples_split self.sufficient_majority = sufficient_majority self.max_depth = max_depth + self.params = {k: v for k, v in vars().items() + if k not in ('args', 'kwargs')} def _select_attr(self, data): """Select the attribute for the next split. diff --git a/Orange/modelling/base.py b/Orange/modelling/base.py index 76cdde482aa..a87ded973bb 100644 --- a/Orange/modelling/base.py +++ b/Orange/modelling/base.py @@ -38,9 +38,9 @@ class Fitter(Learner, metaclass=FitterMeta): def __init__(self, preprocessors=None, **kwargs): super().__init__(preprocessors=preprocessors) - self.params = kwargs + self.kwargs = kwargs # Make sure to pass preprocessor params to individual learners - self.params['preprocessors'] = preprocessors + self.kwargs['preprocessors'] = preprocessors self.__learners = {self.CLASSIFICATION: None, self.REGRESSION: None} def _fit_model(self, data): @@ -56,7 +56,14 @@ def _fit_model(self, data): return learner.fit(X, Y, W) def get_learner(self, problem_type): - """Get the learner for a given problem type.""" + """Get the learner for a given problem type. + + Returns + ------- + Learner + The appropriate learner for the given problem type. + + """ # Prevent trying to access the learner when problem type is None if problem_type not in self.__fits__: raise TypeError("No learner to handle '{}'".format(problem_type)) @@ -69,7 +76,7 @@ def get_learner(self, problem_type): def __kwargs(self, problem_type): learner_kwargs = set( self.__fits__[problem_type].__init__.__code__.co_varnames[1:]) - changed_kwargs = self._change_kwargs(self.params, problem_type) + changed_kwargs = self._change_kwargs(self.kwargs, problem_type) return {k: v for k, v in changed_kwargs.items() if k in learner_kwargs} def _change_kwargs(self, kwargs, problem_type): @@ -95,3 +102,13 @@ def supports_weights(self): and self.get_learner(self.CLASSIFICATION).supports_weights) and ( hasattr(self.get_learner(self.REGRESSION), 'supports_weights') and self.get_learner(self.REGRESSION).supports_weights) + + @property + def params(self): + raise TypeError( + 'A fitter does not have its own params. If you need to access ' + 'learner params, please use the `get_params` method.') + + def get_params(self, problem_type): + """Access the specific learner params of a given learner.""" + return self.get_learner(problem_type).params diff --git a/Orange/regression/tree.py b/Orange/regression/tree.py index 75267ab648f..c6d7fdf67fc 100644 --- a/Orange/regression/tree.py +++ b/Orange/regression/tree.py @@ -52,6 +52,8 @@ def __init__( self.min_samples_leaf = min_samples_leaf self.min_samples_split = min_samples_split self.max_depth = max_depth + self.params = {k: v for k, v in vars().items() + if k not in ('args', 'kwargs')} def _select_attr(self, data): """Select the attribute for the next split. diff --git a/Orange/widgets/regression/tests/test_owadaboostregression.py b/Orange/widgets/regression/tests/test_owadaboostregression.py index c1340bc2bb8..a77ba94004c 100644 --- a/Orange/widgets/regression/tests/test_owadaboostregression.py +++ b/Orange/widgets/regression/tests/test_owadaboostregression.py @@ -19,7 +19,8 @@ def setUp(self): self.valid_datasets = (self.data,) losses = [loss.lower() for loss in self.widget.losses] self.parameters = [ - ParameterMapping('loss', self.widget.reg_algorithm_combo, losses), + ParameterMapping('loss', self.widget.reg_algorithm_combo, losses, + problem_type='regression'), ParameterMapping('learning_rate', self.widget.learning_rate_spin), ParameterMapping('n_estimators', self.widget.n_estimators_spin)] diff --git a/Orange/widgets/regression/tests/test_owsgdregression.py b/Orange/widgets/regression/tests/test_owsgdregression.py index 90aece5d6e4..7f74bbd7a36 100644 --- a/Orange/widgets/regression/tests/test_owsgdregression.py +++ b/Orange/widgets/regression/tests/test_owsgdregression.py @@ -15,8 +15,10 @@ def setUp(self): self.valid_datasets = (self.housing,) self.parameters = [ ParameterMapping('loss', self.widget.reg_loss_function_combo, - list(zip(*self.widget.reg_losses))[1]), - ParameterMapping.from_attribute(self.widget, 'reg_epsilon', 'epsilon'), + list(zip(*self.widget.reg_losses))[1], + problem_type='regression'), + ParameterMapping('epsilon', self.widget.reg_epsilon_spin, + problem_type='regression'), ParameterMapping('penalty', self.widget.penalty_combo, list(zip(*self.widget.penalties))[1]), ParameterMapping.from_attribute(self.widget, 'alpha'), diff --git a/Orange/widgets/tests/base.py b/Orange/widgets/tests/base.py index 0d0eb7f046f..420f419a6f7 100644 --- a/Orange/widgets/tests/base.py +++ b/Orange/widgets/tests/base.py @@ -465,30 +465,33 @@ def test_output_model_name(self): self.widget.apply_button.button.click() self.assertEqual(self.get_output(self.model_name).name, new_name) + def _get_param_value(self, learner, param): + if isinstance(learner, Fitter): + # Both is just a was to indicate to the tests, fitters don't + # actually support this + if param.problem_type == "both": + problem_type = learner.CLASSIFICATION + else: + problem_type = param.problem_type + return learner.get_params(problem_type).get(param.name) + else: + return learner.params.get(param.name) + def test_parameters_default(self): """Check if learner's parameters are set to default (widget's) values """ for dataset in self.valid_datasets: self.send_signal("Data", dataset) self.widget.apply_button.button.click() - if hasattr(self.widget.learner, "params"): - learner_params = self.widget.learner.params - for parameter in self.parameters: - # Skip if the param isn't used for the given data type - if self._should_check_parameter(parameter, dataset): - self.assertEqual(learner_params.get(parameter.name), - parameter.get_value()) + for parameter in self.parameters: + # Skip if the param isn't used for the given data type + if self._should_check_parameter(parameter, dataset): + self.assertEqual( + self._get_param_value(self.widget.learner, parameter), + parameter.get_value()) def test_parameters(self): """Check learner and model for various values of all parameters""" - - def get_value(learner, name): - # Handle SKL and skl-like learners, and non-SKL learners - if hasattr(learner, "params"): - return learner.params.get(name) - else: - return getattr(learner, name) - # Test params on every valid dataset, since some attributes may apply # to only certain problem types for dataset in self.valid_datasets: @@ -504,15 +507,14 @@ def get_value(learner, name): for value in parameter.values: parameter.set_value(value) self.widget.apply_button.button.click() - param = get_value(self.widget.learner, parameter.name) + param = self._get_param_value(self.widget.learner, parameter) self.assertEqual( param, parameter.get_value(), "Mismatching setting for parameter '%s'" % parameter) self.assertEqual( param, value, "Mismatching setting for parameter '%s'" % parameter) - param = get_value(self.get_output("Learner"), - parameter.name) + param = self._get_param_value(self.get_output("Learner"), parameter) self.assertEqual( param, value, "Mismatching setting for parameter '%s'" % parameter) @@ -520,8 +522,7 @@ def get_value(learner, name): if issubclass(self.widget.LEARNER, SklModel): model = self.get_output(self.model_name) if model is not None: - self.assertEqual(get_value(model, parameter.name), - value) + self.assertEqual(self._get_param_value(model, parameter), value) self.assertFalse(self.widget.Error.active) else: self.assertTrue(self.widget.Error.active) From bab229f8e62eb27bea7647a7487cf8ff941d70db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Fri, 17 Feb 2017 12:04:26 +0100 Subject: [PATCH 4/5] Tree: Explicitly copy params --- Orange/classification/tree.py | 13 ++++++------- Orange/regression/tree.py | 11 +++++------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Orange/classification/tree.py b/Orange/classification/tree.py index 3da76f5d217..2be0fbf347a 100644 --- a/Orange/classification/tree.py +++ b/Orange/classification/tree.py @@ -59,13 +59,12 @@ def __init__( min_samples_leaf=1, min_samples_split=2, sufficient_majority=0.95, **kwargs): super().__init__(*args, **kwargs) - self.binarize = binarize - self.min_samples_leaf = min_samples_leaf - self.min_samples_split = min_samples_split - self.sufficient_majority = sufficient_majority - self.max_depth = max_depth - self.params = {k: v for k, v in vars().items() - if k not in ('args', 'kwargs')} + self.params = {} + self.binarize = self.params['binarize'] = binarize + self.min_samples_leaf = self.params['min_samples_leaf'] = min_samples_leaf + self.min_samples_split = self.params['min_samples_split'] = min_samples_split + self.sufficient_majority = self.params['sufficient_majority'] = sufficient_majority + self.max_depth = self.params['max_depth'] = max_depth def _select_attr(self, data): """Select the attribute for the next split. diff --git a/Orange/regression/tree.py b/Orange/regression/tree.py index c6d7fdf67fc..b5fd3207c04 100644 --- a/Orange/regression/tree.py +++ b/Orange/regression/tree.py @@ -48,12 +48,11 @@ def __init__( binarize=False, min_samples_leaf=1, min_samples_split=2, max_depth=None, **kwargs): super().__init__(*args, **kwargs) - self.binarize = binarize - self.min_samples_leaf = min_samples_leaf - self.min_samples_split = min_samples_split - self.max_depth = max_depth - self.params = {k: v for k, v in vars().items() - if k not in ('args', 'kwargs')} + self.params = {} + self.binarize = self.params['binarity'] = binarize + self.min_samples_leaf = self.params['min_samples_leaf'] = min_samples_leaf + self.min_samples_split = self.params['min_samples_split'] = min_samples_split + self.max_depth = self.params['max_depth'] = max_depth def _select_attr(self, data): """Select the attribute for the next split. From 49188eeff0a40e748d6e0044686a7a9b568626a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavlin=20Poli=C4=8Dar?= Date: Fri, 17 Feb 2017 12:46:18 +0100 Subject: [PATCH 5/5] OWSVM: Fix failing tests due to problem_specific params It is necessary to specify which problem type any param on the learner is valid for, and I had forgotten to do this for the SVM learner. This fixes that. --- .../widgets/regression/tests/test_owsvmregression.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/regression/tests/test_owsvmregression.py b/Orange/widgets/regression/tests/test_owsvmregression.py index 5977da4fe57..65aa35b770c 100644 --- a/Orange/widgets/regression/tests/test_owsvmregression.py +++ b/Orange/widgets/regression/tests/test_owsvmregression.py @@ -28,8 +28,10 @@ def setter(value): gamma_spin.setValue(value) self.parameters = [ - ParameterMapping("C", self.widget.C_spin), - ParameterMapping("epsilon", self.widget.epsilon_spin), + ParameterMapping("C", self.widget.C_spin, + problem_type="regression"), + ParameterMapping("epsilon", self.widget.epsilon_spin, + problem_type="regression"), ParameterMapping("gamma", self.widget._kernel_params[0], values=values, setter=setter, getter=getter), ParameterMapping("coef0", self.widget._kernel_params[1]), @@ -44,8 +46,10 @@ def test_parameters_svr_type(self): # setChecked(True) does not trigger callback event self.widget.nu_radio.click() self.assertEqual(self.widget.svm_type, OWSVM.Nu_SVM) - self.parameters[0] = ParameterMapping("C", self.widget.nu_C_spin) - self.parameters[1] = ParameterMapping("nu", self.widget.nu_spin) + self.parameters[0] = ParameterMapping("C", self.widget.nu_C_spin, + problem_type="regression") + self.parameters[1] = ParameterMapping("nu", self.widget.nu_spin, + problem_type="regression") self.test_parameters() def test_kernel_equation(self):