Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Fitter: Fix infinite recursion in __getattr__ #1977

Merged
merged 5 commits into from
Feb 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class Learner(_ReprableWithPreprocessors):
This property is needed mainly because of the `Fitter` class, which can
not know in advance, which preprocessors it will need to use. Therefore
this resolves the active preprocessors using a lazy approach.
params : dict
The params that the learner is constructed with.

"""
supports_multiclass = False
Expand Down Expand Up @@ -114,19 +116,21 @@ def __call__(self, data):
self.__class__.__name__)

self.domain = data.domain

if type(self).fit is Learner.fit:
model = self.fit_storage(data)
else:
X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
model = self.fit(X, Y, W)
model = self._fit_model(data)
model.domain = data.domain
model.supports_multiclass = self.supports_multiclass
model.name = self.name
model.original_domain = origdomain
model.original_data = origdata
return model

def _fit_model(self, data):
if type(self).fit is Learner.fit:
return self.fit_storage(data)
else:
X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
return self.fit(X, Y, W)

def preprocess(self, data):
"""Apply the `preprocessors` to the data"""
for pp in self.active_preprocessors:
Expand Down
11 changes: 6 additions & 5 deletions Orange/classification/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ def __init__(
min_samples_leaf=1, min_samples_split=2, sufficient_majority=0.95,
**kwargs):
super().__init__(*args, **kwargs)
self.binarize = binarize
self.min_samples_leaf = min_samples_leaf
self.min_samples_split = min_samples_split
self.sufficient_majority = sufficient_majority
self.max_depth = max_depth
self.params = {}
self.binarize = self.params['binarize'] = binarize
self.min_samples_leaf = self.params['min_samples_leaf'] = min_samples_leaf
self.min_samples_split = self.params['min_samples_split'] = min_samples_split
self.sufficient_majority = self.params['sufficient_majority'] = sufficient_majority
self.max_depth = self.params['max_depth'] = max_depth

def _select_attr(self, data):
"""Select the attribute for the next split.
Expand Down
46 changes: 30 additions & 16 deletions Orange/modelling/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,32 @@ def __init__(self, preprocessors=None, **kwargs):
self.kwargs = kwargs
# Make sure to pass preprocessor params to individual learners
self.kwargs['preprocessors'] = preprocessors
self.problem_type = None
self.__learners = {self.CLASSIFICATION: None, self.REGRESSION: None}

def __call__(self, data):
# Set the appropriate problem type from the data
self.problem_type = self.CLASSIFICATION if \
data.domain.has_discrete_class else self.REGRESSION
return self.get_learner(self.problem_type)(data)
def _fit_model(self, data):
if data.domain.has_discrete_class:
learner = self.get_learner(self.CLASSIFICATION)
else:
learner = self.get_learner(self.REGRESSION)

if type(self).fit is Learner.fit:
return learner.fit_storage(data)
else:
X, Y, W = data.X, data.Y, data.W if data.has_weights() else None
return learner.fit(X, Y, W)

def get_learner(self, problem_type):
"""Get the learner for a given problem type."""
"""Get the learner for a given problem type.

Returns
-------
Learner
The appropriate learner for the given problem type.

"""
# Prevent trying to access the learner when problem type is None
if problem_type not in self.__fits__:
# We're mostly called from __getattr__ via getattr, so we should
# raise AttributeError instead of TypeError
raise AttributeError("No learner to handle '{}'".format(problem_type))
raise TypeError("No learner to handle '{}'".format(problem_type))
if self.__learners[problem_type] is None:
learner = self.__fits__[problem_type](**self.__kwargs(problem_type))
learner.use_default_preprocessors = self.use_default_preprocessors
Expand All @@ -66,7 +76,7 @@ def get_learner(self, problem_type):
def __kwargs(self, problem_type):
learner_kwargs = set(
self.__fits__[problem_type].__init__.__code__.co_varnames[1:])
changed_kwargs = self._change_kwargs(self.kwargs, self.problem_type)
changed_kwargs = self._change_kwargs(self.kwargs, problem_type)
return {k: v for k, v in changed_kwargs.items() if k in learner_kwargs}

def _change_kwargs(self, kwargs, problem_type):
Expand All @@ -93,8 +103,12 @@ def supports_weights(self):
hasattr(self.get_learner(self.REGRESSION), 'supports_weights')
and self.get_learner(self.REGRESSION).supports_weights)

def __getattr__(self, item):
# Make parameters accessible on the learner for simpler testing
if item in self.kwargs:
return self.kwargs[item]
return getattr(self.get_learner(self.problem_type), item)
@property
def params(self):
raise TypeError(
'A fitter does not have its own params. If you need to access '
'learner params, please use the `get_params` method.')

def get_params(self, problem_type):
"""Access the specific learner params of a given learner."""
return self.get_learner(problem_type).params
9 changes: 5 additions & 4 deletions Orange/regression/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@ def __init__(
binarize=False, min_samples_leaf=1, min_samples_split=2,
max_depth=None, **kwargs):
super().__init__(*args, **kwargs)
self.binarize = binarize
self.min_samples_leaf = min_samples_leaf
self.min_samples_split = min_samples_split
self.max_depth = max_depth
self.params = {}
self.binarize = self.params['binarity'] = binarize
self.min_samples_leaf = self.params['min_samples_leaf'] = min_samples_leaf
self.min_samples_split = self.params['min_samples_split'] = min_samples_split
self.max_depth = self.params['max_depth'] = max_depth

def _select_attr(self, data):
"""Select the attribute for the next split.
Expand Down
10 changes: 7 additions & 3 deletions Orange/tests/test_fitter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import unittest
from unittest.mock import Mock
from unittest.mock import Mock, patch

from Orange.classification.base_classification import LearnerClassification
from Orange.data import Table
from Orange.evaluation import CrossValidation
from Orange.modelling import Fitter
from Orange.preprocess import Randomize
from Orange.regression.base_regression import LearnerRegression
Expand Down Expand Up @@ -92,14 +93,13 @@ class DummyFitter(Fitter):
fitter = DummyFitter()
self.assertEqual(fitter.get_learner(Fitter.CLASSIFICATION).param, 1)
self.assertEqual(fitter.get_learner(Fitter.REGRESSION).param, 2)
self.assertEqual(fitter.name, 'dummy')

# Pass specific params
try:
fitter = DummyFitter(classification_param=10, regression_param=20)
self.assertEqual(fitter.get_learner(Fitter.CLASSIFICATION).param, 10)
self.assertEqual(fitter.get_learner(Fitter.REGRESSION).param, 20)
except AttributeError:
except TypeError:
self.fail('Fitter did not properly distribute params to learners')

def test_error_for_data_type_with_no_learner(self):
Expand All @@ -126,3 +126,7 @@ def test_correctly_sets_preprocessors_on_learner(self):
self.assertEqual(
tuple(learner.active_preprocessors), (pp,),
'Fitter did not properly pass its preprocessors to its learners')

def test_n_jobs_fitting(self):
with patch('Orange.evaluation.testing.CrossValidation._MIN_NJOBS_X_SIZE', 1):
CrossValidation(self.heart_disease, [DummyFitter()], k=5, n_jobs=5)
3 changes: 2 additions & 1 deletion Orange/widgets/regression/tests/test_owadaboostregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def setUp(self):
self.valid_datasets = (self.data,)
losses = [loss.lower() for loss in self.widget.losses]
self.parameters = [
ParameterMapping('loss', self.widget.reg_algorithm_combo, losses),
ParameterMapping('loss', self.widget.reg_algorithm_combo, losses,
problem_type='regression'),
ParameterMapping('learning_rate', self.widget.learning_rate_spin),
ParameterMapping('n_estimators', self.widget.n_estimators_spin)]

Expand Down
6 changes: 4 additions & 2 deletions Orange/widgets/regression/tests/test_owsgdregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ def setUp(self):
self.valid_datasets = (self.housing,)
self.parameters = [
ParameterMapping('loss', self.widget.reg_loss_function_combo,
list(zip(*self.widget.reg_losses))[1]),
ParameterMapping.from_attribute(self.widget, 'reg_epsilon', 'epsilon'),
list(zip(*self.widget.reg_losses))[1],
problem_type='regression'),
ParameterMapping('epsilon', self.widget.reg_epsilon_spin,
problem_type='regression'),
ParameterMapping('penalty', self.widget.penalty_combo,
list(zip(*self.widget.penalties))[1]),
ParameterMapping.from_attribute(self.widget, 'alpha'),
Expand Down
12 changes: 8 additions & 4 deletions Orange/widgets/regression/tests/test_owsvmregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ def setter(value):
gamma_spin.setValue(value)

self.parameters = [
ParameterMapping("C", self.widget.C_spin),
ParameterMapping("epsilon", self.widget.epsilon_spin),
ParameterMapping("C", self.widget.C_spin,
problem_type="regression"),
ParameterMapping("epsilon", self.widget.epsilon_spin,
problem_type="regression"),
ParameterMapping("gamma", self.widget._kernel_params[0],
values=values, setter=setter, getter=getter),
ParameterMapping("coef0", self.widget._kernel_params[1]),
Expand All @@ -44,8 +46,10 @@ def test_parameters_svr_type(self):
# setChecked(True) does not trigger callback event
self.widget.nu_radio.click()
self.assertEqual(self.widget.svm_type, OWSVM.Nu_SVM)
self.parameters[0] = ParameterMapping("C", self.widget.nu_C_spin)
self.parameters[1] = ParameterMapping("nu", self.widget.nu_spin)
self.parameters[0] = ParameterMapping("C", self.widget.nu_C_spin,
problem_type="regression")
self.parameters[1] = ParameterMapping("nu", self.widget.nu_spin,
problem_type="regression")
self.test_parameters()

def test_kernel_equation(self):
Expand Down
41 changes: 21 additions & 20 deletions Orange/widgets/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,30 +465,33 @@ def test_output_model_name(self):
self.widget.apply_button.button.click()
self.assertEqual(self.get_output(self.model_name).name, new_name)

def _get_param_value(self, learner, param):
if isinstance(learner, Fitter):
# Both is just a was to indicate to the tests, fitters don't
# actually support this
if param.problem_type == "both":
problem_type = learner.CLASSIFICATION
else:
problem_type = param.problem_type
return learner.get_params(problem_type).get(param.name)
else:
return learner.params.get(param.name)

def test_parameters_default(self):
"""Check if learner's parameters are set to default (widget's) values
"""
for dataset in self.valid_datasets:
self.send_signal("Data", dataset)
self.widget.apply_button.button.click()
if hasattr(self.widget.learner, "params"):
learner_params = self.widget.learner.params
for parameter in self.parameters:
# Skip if the param isn't used for the given data type
if self._should_check_parameter(parameter, dataset):
self.assertEqual(learner_params.get(parameter.name),
parameter.get_value())
for parameter in self.parameters:
# Skip if the param isn't used for the given data type
if self._should_check_parameter(parameter, dataset):
self.assertEqual(
self._get_param_value(self.widget.learner, parameter),
parameter.get_value())

def test_parameters(self):
"""Check learner and model for various values of all parameters"""

def get_value(learner, name):
# Handle SKL and skl-like learners, and non-SKL learners
if hasattr(learner, "params"):
return learner.params.get(name)
else:
return getattr(learner, name)

# Test params on every valid dataset, since some attributes may apply
# to only certain problem types
for dataset in self.valid_datasets:
Expand All @@ -504,24 +507,22 @@ def get_value(learner, name):
for value in parameter.values:
parameter.set_value(value)
self.widget.apply_button.button.click()
param = get_value(self.widget.learner, parameter.name)
param = self._get_param_value(self.widget.learner, parameter)
self.assertEqual(
param, parameter.get_value(),
"Mismatching setting for parameter '%s'" % parameter)
self.assertEqual(
param, value,
"Mismatching setting for parameter '%s'" % parameter)
param = get_value(self.get_output("Learner"),
parameter.name)
param = self._get_param_value(self.get_output("Learner"), parameter)
self.assertEqual(
param, value,
"Mismatching setting for parameter '%s'" % parameter)

if issubclass(self.widget.LEARNER, SklModel):
model = self.get_output(self.model_name)
if model is not None:
self.assertEqual(get_value(model, parameter.name),
value)
self.assertEqual(self._get_param_value(model, parameter), value)
self.assertFalse(self.widget.Error.active)
else:
self.assertTrue(self.widget.Error.active)
Expand Down