updated default grid params

MEDomics-UdeS · Aug 31, 2024 · 4c947da · 4c947da
1 parent 809763e
commit 4c947da
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/MED3pa/detectron/experiment.py b/MED3pa/detectron/experiment.py
@@ -200,7 +200,7 @@ def run(datasets: DatasetsManager,
         _, y_reference = datasets.get_dataset_by_type(dataset_type="reference")
         reference_size = len(y_reference)
 
-        if reference_size <= samples_size: 
+        if reference_size < samples_size: 
             print("The reference set must be larger than the sample size, ", reference_size,", ", samples_size)
             raise ValueError("Not enough samples in the reference set!")
         if reference_size < 2 * samples_size:

diff --git a/MED3pa/models/dtr_params.py b/MED3pa/models/dtr_params.py
@@ -14,9 +14,9 @@
 dtr_gridsearch_params = [
     {"name": "criterion", "type": "string", "default": ["squared_error", "friedman_mse", "absolute_error", "poisson"], "description": "The function to measure the quality of a split."},
     {"name": "splitter", "type": "string", "default": ["best", "random"], "description": "The strategy used to choose the split at each node."},
-    {"name": "max_depth", "type": "int", "default": [2, 3, 4, 5, 6], "description": "The maximum depth of the tree. Increasing this value will make the model more complex."},
+    {"name": "max_depth", "type": "int", "default": [2, 3, 4], "description": "The maximum depth of the tree. Increasing this value will make the model more complex."},
     {"name": "min_samples_split", "type": "int", "default": [2, 5, 10], "description": "The minimum number of samples required to split an internal node."},
-    {"name": "min_samples_leaf", "type": "int", "default": [1, 2, 4], "description": "The minimum number of samples required to be at a leaf node."},
+    {"name": "min_samples_leaf", "type": "int", "default": [5, 10, 15, 20], "description": "The minimum number of samples required to be at a leaf node."},
     {"name": "min_weight_fraction_leaf", "type": "float", "default": [0.0, 0.1, 0.2], "description": "The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node."},
     {"name": "max_features", "type": "string", "default": ["auto", "sqrt", "log2"], "description": "The number of features to consider when looking for the best split."},
     {"name": "max_leaf_nodes", "type": "int", "default": [10, 20, 30], "description": "Grow trees with max_leaf_nodes in best-first fashion. Best nodes are defined as relative reduction in impurity."},

diff --git a/MED3pa/models/rfr_params.py b/MED3pa/models/rfr_params.py
@@ -19,10 +19,10 @@
 ]
 
 rfr_gridsearch_params = [
-    {"name": "n_estimators", "type": "int", "default": [100, 200, 300, 400, 500], "description": "The number of trees in the forest."},  
-    {"name": "max_depth", "type": "int", "default": [2, 3, 4, 5, 6], "description": "The maximum depth of the tree. Increasing this value will make the model more complex."},  
+    {"name": "n_estimators", "type": "int", "default": [10, 20, 50, 100, 200], "description": "The number of trees in the forest."},  
+    {"name": "max_depth", "type": "int", "default": [2, 3, 4], "description": "The maximum depth of the tree. Increasing this value will make the model more complex."},  
     {"name": "min_samples_split", "type": "int", "default": [2, 5, 10], "description": "The minimum number of samples required to split an internal node."},  
-    {"name": "min_samples_leaf", "type": "int", "default": [1, 2, 4], "description": "The minimum number of samples required to be at a leaf node."},  
+    {"name": "min_samples_leaf", "type": "int", "default": [5, 10, 15, 20], "description": "The minimum number of samples required to be at a leaf node."},  
     {"name": "max_features", "type": "string", "default": ["auto", "sqrt", "log2"], "description": "The number of features to consider when looking for the best split."},  
     {"name": "bootstrap", "type": "bool", "default": [True, False], "description": "Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree."}  
 ]