cog-imperial · spiralulam · Aug 13, 2023 · Jul 7, 2023 · Jul 7, 2023 · Jul 7, 2023
diff --git a/.coverage b/.coverage
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -5,9 +5,7 @@ name: Tests
 
 on:
   push:
-    branches: [ "master", "entmoot-v2"]
-  pull_request:
-    branches: [ "master" ]
+    branches: [ "master", "fix-consistency-tests"]
 
 jobs:
   build:
@@ -16,7 +14,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.11"]
+        python-version: ["3.10"]
 
     steps:
     - uses: actions/checkout@v3
@@ -41,6 +39,4 @@ jobs:
         CICD_ACTIVE: ${{ secrets.CICD_ACTIVE }}
         GRB_LICENSE_FILE: ${{ steps.write-license.outputs.grb_license_file }}
       run: |
-        pytest --cov
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v3
+        pytest -v -m fast_test
diff --git a/README.md b/README.md
@@ -1,7 +1,6 @@
 [![Tests](https://github.com/cog-imperial/entmoot/actions/workflows/python-package.yml/badge.svg?branch=entmoot-v2)](https://github.com/cog-imperial/entmoot/actions/workflows/python-package.yml)
-[![codecov](https://codecov.io/gh/cog-imperial/entmoot/branch/entmoot-v2/graph/badge.svg)](https://codecov.io/gh/cog-imperial/entmoot)
+[![coverage](https://github.com/cog-imperial/entmoot/blob/fix-consistency-tests/coverage.svg)]([https://codecov.io/gh/cog-imperial/entmoot](https://github.com/cog-imperial/entmoot/blob/fix-consistency-tests/coverage.svg))
 ![Python versions](https://img.shields.io/badge/python-3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)
-[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 
 [![Read the Docs](https://readthedocs.org/projects/entmoot/badge/?version=latest)](https://entmoot.readthedocs.io/en/latest/)
 
 <img src="media/entmoot_logo.png" width="400">

diff --git a/coverage.svg b/coverage.svg
diff --git a/entmoot/benchmarks.py b/entmoot/benchmarks.py
@@ -4,14 +4,84 @@
 from entmoot import ProblemConfig
 
 
-def build_multi_obj_categorical_problem(problem_config: ProblemConfig, n_obj: int = 2):
+def build_small_single_obj_categorical_problem(
+    problem_config: ProblemConfig, no_cat=False
+):
     """
     Builds a small test example which is frequently used by the tests.
     :param problem_config: ProblemConfig object where features and objectives are added
     :param n_obj: Number of objectives
     :return: None, the problem definition happens "inplace"
     """
-    problem_config.add_feature("categorical", ("blue", "orange", "gray"))
+    if not no_cat:
+        problem_config.add_feature("categorical", ("blue", "orange", "gray"))
+    problem_config.add_feature("integer", (5, 6))
+    problem_config.add_feature("binary")
+    problem_config.add_feature("real", (5.0, 6.0))
+
+    problem_config.add_min_objective()
+
+
+def eval_small_single_obj_cat_testfunc(X: ArrayLike, no_cat=False) -> np.ndarray:
+    """
+    Benchmark function with at least four input variables and one or two outputs
+    :param X: Usually a numpy array or a list of tuples. Each row (or tuple) consists of at least four entries with the
+    following properties:
+    - The first one is a categorical variable with the possible values "blue", "orange" and "gray".
+    - The second one is an integer variable.
+    - The third one is a binary variable.
+    - The fourth one and all additional variables are real numbers.
+    :param n_obj: number of objectives (one or two)
+    :return: objective value(s) for each data point
+    """
+
+    # without the dtype=object paramer, each entry of X is converted into a string
+    X = np.array(X, dtype=object)
+
+    def compute_objectives(xi: Iterable, no_cat=False):
+        if no_cat:
+            return (
+                xi[1] * xi[2] * np.sin(sum(xi[3:]))
+                + xi[1] * (1 - xi[2]) * np.cos(sum(xi[3:])),
+                xi[1] * xi[2] * sum(xi[3:]) - xi[1] * (1 - xi[2]) * sum(xi[3:]),
+            )
+
+        if xi[0] == "blue":
+            return (
+                xi[1] * xi[2] * np.sin(sum(xi[3:]))
+                + xi[1] * (1 - xi[2]) * np.cos(sum(xi[3:])),
+                xi[1] * xi[2] * sum(xi[3:]) - xi[1] * (1 - xi[2]) * sum(xi[3:]),
+            )
+        elif xi[0] == "orange":
+            return (
+                xi[1] * xi[2] * (sum(xi[3:]) / len(xi[3:])) ** 2
+                + xi[1] * (1 - xi[2]) * (sum(xi[3:]) / len(xi[3:])) ** 3,
+                np.sqrt(
+                    abs(xi[1] * xi[2] * sum(xi[3:]) + xi[1] * (1 - xi[2]) * sum(xi[3:]))
+                ),
+            )
+        elif xi[0] == "gray":
+            return (xi[2] * xi[3] ** xi[1], -(1 - xi[2]) * xi[3] ** xi[1])
+        else:
+            raise IOError(
+                f"You provided the illegal value {xi[0]} for the categorical variable. Allowed values are "
+                f"'blue', 'orange' and 'gray'"
+            )
+
+    return np.array([[sum(compute_objectives(xi, no_cat=no_cat)) / 2] for xi in X])
+
+
+def build_multi_obj_categorical_problem(
+    problem_config: ProblemConfig, n_obj: int = 2, no_cat=False
+):
+    """
+    Builds a small test example which is frequently used by the tests.
+    :param problem_config: ProblemConfig object where features and objectives are added
+    :param n_obj: Number of objectives
+    :return: None, the problem definition happens "inplace"
+    """
+    if not no_cat:
+        problem_config.add_feature("categorical", ("blue", "orange", "gray"))
     problem_config.add_feature("integer", (5, 6))
     problem_config.add_feature("binary")
     problem_config.add_feature("real", (5.0, 6.0))
@@ -22,7 +92,9 @@ def build_multi_obj_categorical_problem(problem_config: ProblemConfig, n_obj: in
         problem_config.add_min_objective()
 
 
-def eval_multi_obj_cat_testfunc(X: ArrayLike, n_obj: int = 2) -> np.ndarray:
+def eval_multi_obj_cat_testfunc(
+    X: ArrayLike, n_obj: int = 2, no_cat=False
+) -> np.ndarray:
     """
     Benchmark function with at least four input variables and one or two outputs
     :param X: Usually a numpy array or a list of tuples. Each row (or tuple) consists of at least four entries with the
@@ -38,7 +110,14 @@ def eval_multi_obj_cat_testfunc(X: ArrayLike, n_obj: int = 2) -> np.ndarray:
     # without the dtype=object paramer, each entry of X is converted into a string
     X = np.array(X, dtype=object)
 
-    def compute_objectives(xi: Iterable):
+    def compute_objectives(xi: Iterable, no_cat=False):
+        if no_cat:
+            return (
+                xi[1] * xi[2] * np.sin(sum(xi[3:]))
+                + xi[1] * (1 - xi[2]) * np.cos(sum(xi[3:])),
+                xi[1] * xi[2] * sum(xi[3:]) - xi[1] * (1 - xi[2]) * sum(xi[3:]),
+            )
+
         if xi[0] == "blue":
             return (
                 xi[1] * xi[2] * np.sin(sum(xi[3:]))
@@ -62,9 +141,9 @@ def compute_objectives(xi: Iterable):
             )
 
     if n_obj == 2:
-        return np.array([compute_objectives(xi) for xi in X])
+        return np.array([compute_objectives(xi, no_cat=no_cat) for xi in X])
     elif n_obj == 1:
-        return np.array([[sum(compute_objectives(xi)) / 2] for xi in X])
+        return np.array([[sum(compute_objectives(xi, no_cat=no_cat)) / 2] for xi in X])
     else:
         raise IOError(
             f"You provided the illegal value {n_obj} for the number of objectives. "

diff --git a/entmoot/models/enting.py b/entmoot/models/enting.py
@@ -48,7 +48,6 @@ class Enting(BaseModel):
     """
 
     def __init__(self, problem_config: ProblemConfig, params: dict = None):
-
         if params is None:
             params = {}
 
@@ -111,12 +110,17 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> None:
         self.mean_model.fit(X, y)
         self.unc_model.fit(X, y)
 
-    def predict(self, X: np.ndarray) -> list:
+    def leaf_bnd_predict(self, obj_name, leaf_enc):
+        bnds = self._problem_config.get_enc_bnd()
+        return self.mean_model.meta_tree_dict[obj_name].prune_var_bnds(leaf_enc, bnds)
+
+    def predict(self, X: np.ndarray, is_enc=False) -> list:
         """
         Computes prediction value of tree model for X.
         """
         # encode categorical features
-        X = self._problem_config.encode(X)
+        if not is_enc:
+            X = self._problem_config.encode(X)
 
         # check dims of X
         if X.ndim == 1:
@@ -136,12 +140,12 @@ def predict(self, X: np.ndarray) -> list:
     def predict_pareto(self):
         pass
 
-    def predict_acq(self, X: np.ndarray) -> list:
+    def predict_acq(self, X: np.ndarray, is_enc=False) -> list:
         """
         predicts value of acquisition function (which contains not only the mean value but also the uncertainty)
         """
         acq_pred = []
-        comb_pred = self.predict(X)
+        comb_pred = self.predict(X, is_enc=is_enc)
         for mean, unc in comb_pred:
             acq_pred.append(mean + self._beta * unc)
         return acq_pred

diff --git a/entmoot/models/mean_models/meta_tree_ensemble.py b/entmoot/models/mean_models/meta_tree_ensemble.py
@@ -160,28 +160,30 @@ def _prune_var_bnds(self, curr_depth, leaf_enc, var_bnds):
             if isinstance(self.split_code_pred, list):
                 # categorical variable
                 cat_set = set(self.split_code_pred)
-                if leaf_enc[curr_depth] == '0':
-                    var_bnds[self.split_var] = \
-                        set(var_bnds[self.split_var]).intersection(cat_set)
+                if leaf_enc[curr_depth] == "0":
+                    var_bnds[self.split_var] = set(
+                        var_bnds[self.split_var]
+                    ).intersection(cat_set)
                     self.left._prune_var_bnds(curr_depth + 1, leaf_enc, var_bnds)
                 else:
-                    var_bnds[self.split_var] = \
-                        set(var_bnds[self.split_var]).difference(cat_set)
+                    var_bnds[self.split_var] = set(var_bnds[self.split_var]).difference(
+                        cat_set
+                    )
                     self.right._prune_var_bnds(curr_depth + 1, leaf_enc, var_bnds)
             else:
                 # continuous variable
                 lb, ub = var_bnds[self.split_var]
-                if leaf_enc[curr_depth] == '0':
-                    ub = min(ub,self.split_code_pred)
+                if leaf_enc[curr_depth] == "0":
+                    ub = min(ub, self.split_code_pred)
                     var_bnds[self.split_var] = (lb, ub)
                     self.left._prune_var_bnds(curr_depth + 1, leaf_enc, var_bnds)
-                else: # if value is '1'
-                    lb = max(lb,self.split_code_pred)
+                else:  # if value is '1'
+                    lb = max(lb, self.split_code_pred)
                     var_bnds[self.split_var] = (lb, ub)
                     self.right._prune_var_bnds(curr_depth + 1, leaf_enc, var_bnds)
 
 
-class LeafNode(TreeType):
+class LeafNode(TreeNode):
     def __init__(self, split_code_pred):
         self.split_var = -1
         self.split_code_pred = split_code_pred

diff --git a/entmoot/models/mean_models/tree_ensemble.py b/entmoot/models/mean_models/tree_ensemble.py
@@ -7,17 +7,15 @@
 
 class TreeEnsemble(BaseModel):
     def __init__(self, problem_config, params=None):
-
         if params is None:
             params = {}
 
         self._problem_config = problem_config
         self._train_lib = params.get("train_lib", "lgbm")
         self._rnd_seed = problem_config.rnd_seed
 
-        assert self._train_lib in ("lgbm", "catboost", "xgboost"), (
-            "Parameter 'train_lib' for tree ensembles needs to be "
-            "in '('lgbm', 'catboost', 'xgboost')'."
+        assert self._train_lib in ("lgbm"), (
+            "Parameter 'train_lib' for tree ensembles needs to be " "in '('lgbm')'."
         )
 
         if "train_params" not in params:
@@ -113,14 +111,14 @@ def _train_lgbm(self, X, y):
                     self._train_params,
                     train_data,
                     categorical_feature=self._problem_config.cat_idx,
-                    verbose_eval=False,
+                    #verbose_eval=False,
                 )
             else:
                 # train for non-categorical vars
                 train_data = lgb.Dataset(X, label=y, params={"verbose": -1})
 
                 tree_model = lgb.train(
-                    self._train_params, train_data, verbose_eval=False
+                    self._train_params, train_data#, verbose_eval=False
                 )
         return tree_model
 
@@ -136,7 +134,6 @@ def _update_meta_tree_dict(self):
 
         # get model information
         for obj in self._problem_config.obj_list:
-
             if self._train_lib == "lgbm":
                 lib_out = self.tree_dict[obj.name].dump_model()
             elif self._train_lib == "catboost":
@@ -146,7 +143,7 @@ def _update_meta_tree_dict(self):
             else:
                 raise IOError(
                     "Parameter 'train_lib' for tree ensembles needs to be "
-                    "in '('lgbm', 'catboost', 'xgboost')'."
+                    "in '('lgbm')'."
                 )
 
             # order tree_model_dict