Skip to content

Commit

Permalink
Merge pull request #4767 from PrimozGodec/fix-softmax
Browse files Browse the repository at this point in the history
[FIX] Fix and update Softmax regression learner
  • Loading branch information
lanzagar authored May 28, 2020
2 parents efbe9ef + ec21f13 commit 5cd99c7
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 57 deletions.
30 changes: 26 additions & 4 deletions Orange/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,12 @@ class Model(Reprable):
ValueProbs = 2

def __init__(self, domain=None, original_domain=None):
if isinstance(self, Learner):
domain = None
elif domain is None:
raise ValueError("unspecified domain")
self.domain = domain
if original_domain is not None:
self.original_domain = original_domain
else:
self.original_domain = domain
self.used_vals = None

def predict(self, X):
if type(self).predict_storage is Model.predict_storage:
Expand Down Expand Up @@ -383,6 +380,30 @@ def one_hot_probs(value):
probs[:, i, :] = one_hot(value[:, i])
return probs

def extend_probabilities(probs):
"""
Since SklModels and models implementing `fit` and not `fit_storage`
do not guarantee correct prediction dimensionality, extend
dimensionality of probabilities when it does not match the number
of values in the domain.
"""
class_vars = self.domain.class_vars
max_values = max(len(cv.values) for cv in class_vars)
if max_values == probs.shape[-1]:
return probs

if not self.supports_multiclass:
probs = probs[:, np.newaxis, :]

probs_ext = np.zeros((len(probs), len(class_vars), max_values))
for c, used_vals in enumerate(self.used_vals):
for i, cv in enumerate(used_vals):
probs_ext[:, c, cv] = probs[:, c, i]

if not self.supports_multiclass:
probs_ext = probs_ext[:, 0, :]
return probs_ext

def fix_dim(x):
return x[0] if one_d else x

Expand Down Expand Up @@ -439,6 +460,7 @@ def fix_dim(x):
if probs is None and (ret != Model.Value or backmappers is not None):
probs = one_hot_probs(value)
if probs is not None:
probs = extend_probabilities(probs)
probs = self.backmap_probs(probs, n_values, backmappers)
if ret != Model.Probs:
if value is None:
Expand Down
23 changes: 1 addition & 22 deletions Orange/classification/base_classification.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import numpy as np

from Orange.base import Learner, Model, SklLearner, SklModel

__all__ = ["LearnerClassification", "ModelClassification",
Expand All @@ -18,26 +16,7 @@ class ModelClassification(Model):


class SklModelClassification(SklModel, ModelClassification):
def predict(self, X):
prediction = super().predict(X)
if not isinstance(prediction, tuple):
return prediction
values, probs = prediction

class_vars = self.domain.class_vars
max_values = max(len(cv.values) for cv in class_vars)
if max_values == probs.shape[-1]:
return values, probs

if not self.supports_multiclass:
probs = probs[:, np.newaxis, :]
probs_ext = np.zeros((len(probs), len(class_vars), max_values))
for c, used_vals in enumerate(self.used_vals):
for i, cv in enumerate(used_vals):
probs_ext[:, c, cv] = probs[:, c, i]
if not self.supports_multiclass:
probs_ext = probs_ext[:, 0, :]
return values, probs_ext
pass


class SklLearnerClassification(SklLearner, LearnerClassification):
Expand Down
48 changes: 21 additions & 27 deletions Orange/classification/softmax_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ class SoftmaxRegressionLearner(Learner):
parameters to be smaller.
preprocessors : list, optional
Preprocessors are applied to data before training or testing. Default preprocessors:
Defaults to
`[RemoveNaNClasses(), RemoveNaNColumns(), Impute(), Continuize(), Normalize()]`
Preprocessors are applied to data before training or testing. Default
preprocessors:
`[RemoveNaNClasses(), RemoveNaNColumns(), Impute(), Continuize(),
Normalize()]`
- remove columns with all values as NaN
- replace NaN values with suitable values
Expand All @@ -52,53 +53,55 @@ def __init__(self, lambda_=1.0, preprocessors=None, **fmin_args):
super().__init__(preprocessors=preprocessors)
self.lambda_ = lambda_
self.fmin_args = fmin_args
self.num_classes = None

def cost_grad(self, Theta_flat, X, Y):
Theta = Theta_flat.reshape((self.num_classes, X.shape[1]))
def cost_grad(self, theta_flat, X, Y):
theta = theta_flat.reshape((self.num_classes, X.shape[1]))

M = X.dot(Theta.T)
M = X.dot(theta.T)
P = np.exp(M - np.max(M, axis=1)[:, None])
P /= np.sum(P, axis=1)[:, None]

cost = -np.sum(np.log(P) * Y)
cost += self.lambda_ * Theta_flat.dot(Theta_flat) / 2.0
cost += self.lambda_ * theta_flat.dot(theta_flat) / 2.0
cost /= X.shape[0]

grad = X.T.dot(P - Y).T
grad += self.lambda_ * Theta
grad += self.lambda_ * theta
grad /= X.shape[0]

return cost, grad.ravel()

def fit(self, X, y, W):
if len(y.shape) > 1:
def fit(self, X, Y, W=None):
if len(Y.shape) > 1:
raise ValueError('Softmax regression does not support '
'multi-label classification')

if np.isnan(np.sum(X)) or np.isnan(np.sum(y)):
if np.isnan(np.sum(X)) or np.isnan(np.sum(Y)):
raise ValueError('Softmax regression does not support '
'unknown values')

X = np.hstack((X, np.ones((X.shape[0], 1))))

self.num_classes = np.unique(y).size
Y = np.eye(self.num_classes)[y.ravel().astype(int)]
self.num_classes = np.unique(Y).size
Y = np.eye(self.num_classes)[Y.ravel().astype(int)]

theta = np.zeros(self.num_classes * X.shape[1])
theta, j, ret = fmin_l_bfgs_b(self.cost_grad, theta,
args=(X, Y), **self.fmin_args)
Theta = theta.reshape((self.num_classes, X.shape[1]))
theta = theta.reshape((self.num_classes, X.shape[1]))

return SoftmaxRegressionModel(Theta)
return SoftmaxRegressionModel(theta)


class SoftmaxRegressionModel(Model):
def __init__(self, Theta):
self.Theta = Theta
def __init__(self, theta):
super().__init__()
self.theta = theta

def predict(self, X):
X = np.hstack((X, np.ones((X.shape[0], 1))))
M = X.dot(self.Theta.T)
M = X.dot(self.theta.T)
P = np.exp(M - np.max(M, axis=1)[:, None])
P /= np.sum(P, axis=1)[:, None]
return P
Expand All @@ -119,7 +122,6 @@ def numerical_grad(f, params, e=1e-4):
return grad

d = Orange.data.Table('iris')
m = SoftmaxRegressionLearner(lambda_=1.0)

# gradient check
m = SoftmaxRegressionLearner(lambda_=1.0)
Expand All @@ -132,11 +134,3 @@ def numerical_grad(f, params, e=1e-4):

print(ga)
print(gn)

# for lambda_ in [0.1, 0.3, 1, 3, 10]:
# m = SoftmaxRegressionLearner(lambda_=lambda_)
# scores = []
# for tr_ind, te_ind in StratifiedKFold(d.Y.ravel()):
# s = np.mean(m(d[tr_ind])(d[te_ind]) == d[te_ind].Y.ravel())
# scores.append(s)
# print('{:4.1f} {}'.format(lambda_, np.mean(scores)))
4 changes: 0 additions & 4 deletions Orange/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,6 @@ def test_result_shape(self):
"""
iris = Table('iris')
for learner in all_learners():
# TODO: Softmax Regression will be fixed as a separate PR
if learner is SoftmaxRegressionLearner:
continue

with self.subTest(learner.__name__):
# model trained on only one value (but three in the domain)
try:
Expand Down

0 comments on commit 5cd99c7

Please sign in to comment.