diff --git a/Orange/base.py b/Orange/base.py
index 3566485a4f6..8a9cbf9d9c0 100644
--- a/Orange/base.py
+++ b/Orange/base.py
@@ -206,15 +206,12 @@ class Model(Reprable):
     ValueProbs = 2
 
     def __init__(self, domain=None, original_domain=None):
-        if isinstance(self, Learner):
-            domain = None
-        elif domain is None:
-            raise ValueError("unspecified domain")
         self.domain = domain
         if original_domain is not None:
             self.original_domain = original_domain
         else:
             self.original_domain = domain
+        self.used_vals = None
 
     def predict(self, X):
         if type(self).predict_storage is Model.predict_storage:
@@ -383,6 +380,30 @@ def one_hot_probs(value):
                 probs[:, i, :] = one_hot(value[:, i])
             return probs
 
+        def extend_probabilities(probs):
+            """
+            Since SklModels and models implementing `fit` and not `fit_storage`
+            do not guarantee correct prediction dimensionality, extend
+            dimensionality of probabilities when it does not match the number
+            of values in the domain.
+            """
+            class_vars = self.domain.class_vars
+            max_values = max(len(cv.values) for cv in class_vars)
+            if max_values == probs.shape[-1]:
+                return probs
+
+            if not self.supports_multiclass:
+                probs = probs[:, np.newaxis, :]
+
+            probs_ext = np.zeros((len(probs), len(class_vars), max_values))
+            for c, used_vals in enumerate(self.used_vals):
+                for i, cv in enumerate(used_vals):
+                    probs_ext[:, c, cv] = probs[:, c, i]
+
+            if not self.supports_multiclass:
+                probs_ext = probs_ext[:, 0, :]
+            return probs_ext
+
         def fix_dim(x):
             return x[0] if one_d else x
 
@@ -439,6 +460,7 @@ def fix_dim(x):
         if probs is None and (ret != Model.Value or backmappers is not None):
             probs = one_hot_probs(value)
         if probs is not None:
+            probs = extend_probabilities(probs)
             probs = self.backmap_probs(probs, n_values, backmappers)
         if ret != Model.Probs:
             if value is None:
diff --git a/Orange/classification/base_classification.py b/Orange/classification/base_classification.py
index daca3c09546..38608325da6 100644
--- a/Orange/classification/base_classification.py
+++ b/Orange/classification/base_classification.py
@@ -1,5 +1,3 @@
-import numpy as np
-
 from Orange.base import Learner, Model, SklLearner, SklModel
 
 __all__ = ["LearnerClassification", "ModelClassification",
@@ -18,26 +16,7 @@ class ModelClassification(Model):
 
 
 class SklModelClassification(SklModel, ModelClassification):
-    def predict(self, X):
-        prediction = super().predict(X)
-        if not isinstance(prediction, tuple):
-            return prediction
-        values, probs = prediction
-
-        class_vars = self.domain.class_vars
-        max_values = max(len(cv.values) for cv in class_vars)
-        if max_values == probs.shape[-1]:
-            return values, probs
-
-        if not self.supports_multiclass:
-            probs = probs[:, np.newaxis, :]
-        probs_ext = np.zeros((len(probs), len(class_vars), max_values))
-        for c, used_vals in enumerate(self.used_vals):
-            for i, cv in enumerate(used_vals):
-                probs_ext[:, c, cv] = probs[:, c, i]
-        if not self.supports_multiclass:
-            probs_ext = probs_ext[:, 0, :]
-        return values, probs_ext
+    pass
 
 
 class SklLearnerClassification(SklLearner, LearnerClassification):
diff --git a/Orange/classification/softmax_regression.py b/Orange/classification/softmax_regression.py
index 02f7da8a64a..1efc5444bde 100644
--- a/Orange/classification/softmax_regression.py
+++ b/Orange/classification/softmax_regression.py
@@ -29,9 +29,10 @@ class SoftmaxRegressionLearner(Learner):
         parameters to be smaller.
 
     preprocessors : list, optional
-        Preprocessors are applied to data before training or testing. Default preprocessors:
-        Defaults to
-        `[RemoveNaNClasses(), RemoveNaNColumns(), Impute(), Continuize(), Normalize()]`
+        Preprocessors are applied to data before training or testing. Default
+        preprocessors:
+        `[RemoveNaNClasses(), RemoveNaNColumns(), Impute(), Continuize(),
+        Normalize()]`
 
         - remove columns with all values as NaN
         - replace NaN values with suitable values
@@ -52,53 +53,55 @@ def __init__(self, lambda_=1.0, preprocessors=None, **fmin_args):
         super().__init__(preprocessors=preprocessors)
         self.lambda_ = lambda_
         self.fmin_args = fmin_args
+        self.num_classes = None
 
-    def cost_grad(self, Theta_flat, X, Y):
-        Theta = Theta_flat.reshape((self.num_classes, X.shape[1]))
+    def cost_grad(self, theta_flat, X, Y):
+        theta = theta_flat.reshape((self.num_classes, X.shape[1]))
 
-        M = X.dot(Theta.T)
+        M = X.dot(theta.T)
         P = np.exp(M - np.max(M, axis=1)[:, None])
         P /= np.sum(P, axis=1)[:, None]
 
         cost = -np.sum(np.log(P) * Y)
-        cost += self.lambda_ * Theta_flat.dot(Theta_flat) / 2.0
+        cost += self.lambda_ * theta_flat.dot(theta_flat) / 2.0
         cost /= X.shape[0]
 
         grad = X.T.dot(P - Y).T
-        grad += self.lambda_ * Theta
+        grad += self.lambda_ * theta
         grad /= X.shape[0]
 
         return cost, grad.ravel()
 
-    def fit(self, X, y, W):
-        if len(y.shape) > 1:
+    def fit(self, X, Y, W=None):
+        if len(Y.shape) > 1:
             raise ValueError('Softmax regression does not support '
                              'multi-label classification')
 
-        if np.isnan(np.sum(X)) or np.isnan(np.sum(y)):
+        if np.isnan(np.sum(X)) or np.isnan(np.sum(Y)):
             raise ValueError('Softmax regression does not support '
                              'unknown values')
 
         X = np.hstack((X, np.ones((X.shape[0], 1))))
 
-        self.num_classes = np.unique(y).size
-        Y = np.eye(self.num_classes)[y.ravel().astype(int)]
+        self.num_classes = np.unique(Y).size
+        Y = np.eye(self.num_classes)[Y.ravel().astype(int)]
 
         theta = np.zeros(self.num_classes * X.shape[1])
         theta, j, ret = fmin_l_bfgs_b(self.cost_grad, theta,
                                       args=(X, Y), **self.fmin_args)
-        Theta = theta.reshape((self.num_classes, X.shape[1]))
+        theta = theta.reshape((self.num_classes, X.shape[1]))
 
-        return SoftmaxRegressionModel(Theta)
+        return SoftmaxRegressionModel(theta)
 
 
 class SoftmaxRegressionModel(Model):
-    def __init__(self, Theta):
-        self.Theta = Theta
+    def __init__(self, theta):
+        super().__init__()
+        self.theta = theta
 
     def predict(self, X):
         X = np.hstack((X, np.ones((X.shape[0], 1))))
-        M = X.dot(self.Theta.T)
+        M = X.dot(self.theta.T)
         P = np.exp(M - np.max(M, axis=1)[:, None])
         P /= np.sum(P, axis=1)[:, None]
         return P
@@ -119,7 +122,6 @@ def numerical_grad(f, params, e=1e-4):
         return grad
 
     d = Orange.data.Table('iris')
-    m = SoftmaxRegressionLearner(lambda_=1.0)
 
     # gradient check
     m = SoftmaxRegressionLearner(lambda_=1.0)
@@ -132,11 +134,3 @@ def numerical_grad(f, params, e=1e-4):
 
     print(ga)
     print(gn)
-
-# for lambda_ in [0.1, 0.3, 1, 3, 10]:
-# m = SoftmaxRegressionLearner(lambda_=lambda_)
-# scores = []
-# for tr_ind, te_ind in StratifiedKFold(d.Y.ravel()):
-#            s = np.mean(m(d[tr_ind])(d[te_ind]) == d[te_ind].Y.ravel())
-#            scores.append(s)
-#        print('{:4.1f} {}'.format(lambda_, np.mean(scores)))
diff --git a/Orange/tests/test_classification.py b/Orange/tests/test_classification.py
index 189a229c5cc..54d3b86e61d 100644
--- a/Orange/tests/test_classification.py
+++ b/Orange/tests/test_classification.py
@@ -214,10 +214,6 @@ def test_result_shape(self):
         """
         iris = Table('iris')
         for learner in all_learners():
-            # TODO: Softmax Regression will be fixed as a separate PR
-            if learner is SoftmaxRegressionLearner:
-                continue
-
             with self.subTest(learner.__name__):
                 # model trained on only one value (but three in the domain)
                 try: