Allow defining of custom splits in a splits.json file (#1046)

* Enable getting custom splits from splits.json file * Enable custom splits.json in more parts of the devkit Still some `create_splits_scenes` left, which don't support custom splits. * Enable filtering the prediction/results by a (custom) split So far, only GT got filtered by split, and the results file was expected to only contain the exact samples of one specific split. * Keep original load_prediction and load_gt * restore state from master for panoptic and lidarseg * mock splits.json file in detection and tracking eval unit tests * tidy up unit tests * implement PR feedback comments
nutonomy · Apr 2, 2024 · 4df2701 · 4df2701
1 parent 1b03e7d
commit 4df2701
Show file tree

Hide file tree

Showing 8 changed files with 325 additions and 44 deletions.
diff --git a/python-sdk/nuscenes/eval/common/loaders.py b/python-sdk/nuscenes/eval/common/loaders.py
@@ -2,20 +2,19 @@
 # Code written by Oscar Beijbom, 2019.
 
 import json
-from typing import Dict, Tuple
+from typing import Dict, List, Tuple
 
 import numpy as np
 import tqdm
-from pyquaternion import Quaternion
-
 from nuscenes import NuScenes
 from nuscenes.eval.common.data_classes import EvalBoxes
 from nuscenes.eval.detection.data_classes import DetectionBox
 from nuscenes.eval.detection.utils import category_to_detection_name
 from nuscenes.eval.tracking.data_classes import TrackingBox
 from nuscenes.utils.data_classes import Box
 from nuscenes.utils.geometry_utils import points_in_box
-from nuscenes.utils.splits import create_splits_scenes
+from nuscenes.utils.splits import create_splits_scenes, get_scenes_of_custom_split
+from pyquaternion import Quaternion
 
 
 def load_prediction(result_path: str, max_boxes_per_sample: int, box_cls, verbose: bool = False) \
@@ -283,3 +282,159 @@ def _get_box_class_field(eval_boxes: EvalBoxes) -> str:
         raise Exception('Error: Invalid box type: %s' % box)
 
     return class_field
+
+def load_prediction_of_sample_tokens(result_path: str, max_boxes_per_sample: int, box_cls,
+                                     sample_tokens: List[str], verbose: bool = False) \
+        -> Tuple[EvalBoxes, Dict]:
+    """
+    Loads object predictions from file.
+    :param result_path: Path to the .json result file provided by the user.
+    :param max_boxes_per_sample: Maximim number of boxes allowed per sample.
+    :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
+    :param verbose: Whether to print messages to stdout.
+    :param limit_to_split: Optional split name to filter the predictions by.
+    :param nusc: Optional NuScenes instance needed for filtering by split.
+    :return: The deserialized results and meta data.
+    """
+
+    # Load from file and check that the format is correct.
+    with open(result_path) as f:
+        data = json.load(f)
+    assert 'results' in data, 'Error: No field `results` in result file. Please note that the result format changed.' \
+                              'See https://www.nuscenes.org/object-detection for more information.'
+    assert isinstance(data['results'], dict), 'Error: results must be a dict.'
+
+     # Filter by sample tokens.
+    results_of_split : dict = {sample_token: data['results'][sample_token] for sample_token in sample_tokens}
+
+    # Deserialize results and get meta data.
+    boxes_of_split : EvalBoxes = EvalBoxes.deserialize(results_of_split, box_cls)
+    meta = data['meta']
+    if verbose:
+        print("Loaded results from {}. Found detections for {} samples."
+              .format(result_path, len(boxes_of_split.sample_tokens)))
+
+    # Check that each sample has no more than x predicted boxes.
+    for sample_token in boxes_of_split.sample_tokens:
+        assert len(boxes_of_split.boxes[sample_token]) <= max_boxes_per_sample, \
+            "Error: Only <= %d boxes per sample allowed!" % max_boxes_per_sample
+
+    return boxes_of_split, meta
+
+
+def load_gt_of_sample_tokens(nusc: NuScenes, sample_tokens: List[str], box_cls,
+                              verbose: bool = False) -> EvalBoxes:
+    """
+    Loads ground truth boxes from DB.
+    :param nusc: A NuScenes instance.
+    :param eval_split: The evaluation split for which we load GT boxes.
+    :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
+    :param verbose: Whether to print messages to stdout.
+    :return: The GT boxes.
+    """
+    # Init.
+    if box_cls == DetectionBox:
+        attribute_map = {a['token']: a['name'] for a in nusc.attribute}
+
+    all_annotations = EvalBoxes()
+
+    # Load annotations and filter predictions and annotations.
+    tracking_id_set = set()
+    for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):
+
+        sample = nusc.get('sample', sample_token)
+        sample_annotation_tokens = sample['anns']
+
+        sample_boxes = []
+        for sample_annotation_token in sample_annotation_tokens:
+
+            sample_annotation = nusc.get('sample_annotation', sample_annotation_token)
+            if box_cls == DetectionBox:
+                # Get label name in detection task and filter unused labels.
+                detection_name = category_to_detection_name(sample_annotation['category_name'])
+                if detection_name is None:
+                    continue
+
+                # Get attribute_name.
+                attr_tokens = sample_annotation['attribute_tokens']
+                attr_count = len(attr_tokens)
+                if attr_count == 0:
+                    attribute_name = ''
+                elif attr_count == 1:
+                    attribute_name = attribute_map[attr_tokens[0]]
+                else:
+                    raise Exception('Error: GT annotations must not have more than one attribute!')
+
+                sample_boxes.append(
+                    box_cls(
+                        sample_token=sample_token,
+                        translation=sample_annotation['translation'],
+                        size=sample_annotation['size'],
+                        rotation=sample_annotation['rotation'],
+                        velocity=nusc.box_velocity(sample_annotation['token'])[:2],
+                        num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],
+                        detection_name=detection_name,
+                        detection_score=-1.0,  # GT samples do not have a score.
+                        attribute_name=attribute_name
+                    )
+                )
+            elif box_cls == TrackingBox:
+                # Use nuScenes token as tracking id.
+                tracking_id = sample_annotation['instance_token']
+                tracking_id_set.add(tracking_id)
+
+                # Get label name in detection task and filter unused labels.
+                # Import locally to avoid errors when motmetrics package is not installed.
+                from nuscenes.eval.tracking.utils import category_to_tracking_name
+                tracking_name = category_to_tracking_name(sample_annotation['category_name'])
+                if tracking_name is None:
+                    continue
+
+                sample_boxes.append(
+                    box_cls(
+                        sample_token=sample_token,
+                        translation=sample_annotation['translation'],
+                        size=sample_annotation['size'],
+                        rotation=sample_annotation['rotation'],
+                        velocity=nusc.box_velocity(sample_annotation['token'])[:2],
+                        num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],
+                        tracking_id=tracking_id,
+                        tracking_name=tracking_name,
+                        tracking_score=-1.0  # GT samples do not have a score.
+                    )
+                )
+            else:
+                raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls)
+
+        all_annotations.add_boxes(sample_token, sample_boxes)
+
+    if verbose:
+        print("Loaded ground truth annotations for {} samples.".format(len(all_annotations.sample_tokens)))
+
+    return all_annotations
+
+def get_samples_of_custom_split(split_name: str, nusc : NuScenes) -> List[str]:
+    """
+    Returns the sample tokens of a custom/user-defined split.
+    :param split_name: The name of the custom split.
+    :param nusc: The NuScenes instance.
+    :return: The sample tokens of the custom split.
+    """
+
+    scenes_of_split : List[str] = get_scenes_of_custom_split(split_name=split_name, nusc=nusc)
+    sample_tokens_of_split : List[str] = get_samples_of_scenes(scene_names=scenes_of_split, nusc=nusc)
+    return sample_tokens_of_split
+
+def get_samples_of_scenes(scene_names: List[str], nusc: NuScenes) -> List[str]:
+    """Given a list of scene names, returns the sample tokens of these scenes."""
+
+    all_sample_tokens = [s['token'] for s in nusc.sample]
+    assert len(all_sample_tokens) > 0, "Error: Database has no samples!"
+
+    filtered_sample_tokens : List[str] = []
+    for sample_token in all_sample_tokens:
+        scene_token = nusc.get('sample', sample_token)['scene_token']
+        scene_record = nusc.get('scene', scene_token)
+        if scene_record['name'] in scene_names:
+            filtered_sample_tokens.append(sample_token)
+    return filtered_sample_tokens
diff --git a/python-sdk/nuscenes/eval/detection/evaluate.py b/python-sdk/nuscenes/eval/detection/evaluate.py
@@ -6,19 +6,31 @@
 import os
 import random
 import time
-from typing import Tuple, Dict, Any
+from typing import Any, Dict, List, Tuple
 
 import numpy as np
-
 from nuscenes import NuScenes
 from nuscenes.eval.common.config import config_factory
 from nuscenes.eval.common.data_classes import EvalBoxes
-from nuscenes.eval.common.loaders import load_prediction, load_gt, add_center_dist, filter_eval_boxes
+from nuscenes.eval.common.loaders import (
+    add_center_dist,
+    filter_eval_boxes,
+    get_samples_of_custom_split,
+    load_gt,
+    load_gt_of_sample_tokens,
+    load_prediction,
+    load_prediction_of_sample_tokens,
+)
 from nuscenes.eval.detection.algo import accumulate, calc_ap, calc_tp
 from nuscenes.eval.detection.constants import TP_METRICS
-from nuscenes.eval.detection.data_classes import DetectionConfig, DetectionMetrics, DetectionBox, \
-    DetectionMetricDataList
-from nuscenes.eval.detection.render import summary_plot, class_pr_curve, class_tp_curve, dist_pr_curve, visualize_sample
+from nuscenes.eval.detection.data_classes import (
+    DetectionBox,
+    DetectionConfig,
+    DetectionMetricDataList,
+    DetectionMetrics,
+)
+from nuscenes.eval.detection.render import class_pr_curve, class_tp_curve, dist_pr_curve, summary_plot, visualize_sample
+from nuscenes.utils.splits import is_predefined_split
 
 
 class DetectionEval:
@@ -77,9 +89,16 @@ def __init__(self,
         # Load data.
         if verbose:
             print('Initializing nuScenes detection evaluation')
-        self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionBox,
-                                                     verbose=verbose)
-        self.gt_boxes = load_gt(self.nusc, self.eval_set, DetectionBox, verbose=verbose)
+
+        if is_predefined_split(split_name=eval_set):
+            self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionBox,
+                                                        verbose=verbose)
+            self.gt_boxes = load_gt(self.nusc, self.eval_set, DetectionBox, verbose=verbose)
+        else:
+            sample_tokens_of_custom_split : List[str] = get_samples_of_custom_split(split_name=eval_set, nusc=nusc)
+            self.pred_boxes, self.meta = load_prediction_of_sample_tokens(self.result_path, self.cfg.max_boxes_per_sample,
+                DetectionBox, sample_tokens=sample_tokens_of_custom_split, verbose=verbose)
+            self.gt_boxes = load_gt_of_sample_tokens(nusc, sample_tokens_of_custom_split, DetectionBox, verbose=verbose)
 
         assert set(self.pred_boxes.sample_tokens) == set(self.gt_boxes.sample_tokens), \
             "Samples in split doesn't match samples in predictions."

diff --git a/python-sdk/nuscenes/eval/detection/tests/test_evaluate.py b/python-sdk/nuscenes/eval/detection/tests/test_evaluate.py
@@ -6,28 +6,39 @@
 import random
 import shutil
 import unittest
-from typing import Dict
+from typing import Dict, List
+from unittest.mock import patch
 
 import numpy as np
-from tqdm import tqdm
-
 from nuscenes import NuScenes
 from nuscenes.eval.common.config import config_factory
 from nuscenes.eval.detection.constants import DETECTION_NAMES
 from nuscenes.eval.detection.evaluate import DetectionEval
 from nuscenes.eval.detection.utils import category_to_detection_name, detection_name_to_rel_attributes
-from nuscenes.utils.splits import create_splits_scenes
+from nuscenes.utils.splits import get_scenes_of_split
+from parameterized import parameterized
+from tqdm import tqdm
 
 
 class TestMain(unittest.TestCase):
     res_mockup = 'nusc_eval.json'
     res_eval_folder = 'tmp'
+    splits_file_mockup = 'mocked_splits.json'
+
+    def setUp(self):
+        with open(self.splits_file_mockup, 'w') as f:
+            json.dump({
+                "mini_custom_train": ["scene-0061", "scene-0553"],
+                "mini_custom_val": ["scene-0103", "scene-0916"]
+            }, f, indent=2)
 
     def tearDown(self):
         if os.path.exists(self.res_mockup):
             os.remove(self.res_mockup)
         if os.path.exists(self.res_eval_folder):
             shutil.rmtree(self.res_eval_folder)
+        if os.path.exists(self.splits_file_mockup):
+            os.remove(self.splits_file_mockup)
 
     @staticmethod
     def _mock_submission(nusc: NuScenes, split: str) -> Dict[str, dict]:
@@ -68,10 +79,10 @@ def random_attr(name: str) -> str:
             'use_external': False,
         }
         mock_results = {}
-        splits = create_splits_scenes()
+        scenes_of_eval_split : List[str] = get_scenes_of_split(split_name=split, nusc=nusc)
         val_samples = []
         for sample in nusc.sample:
-            if nusc.get('scene', sample['scene_token'])['name'] in splits[split]:
+            if nusc.get('scene', sample['scene_token'])['name'] in scenes_of_eval_split:
                 val_samples.append(sample)
 
         for sample in tqdm(val_samples, leave=False):
@@ -97,23 +108,32 @@ def random_attr(name: str) -> str:
         }
         return mock_submission
 
-    def test_delta(self):
+
+
+    @parameterized.expand([
+        ('mini_val',),
+        ('mini_custom_val',)
+    ])
+    @patch('nuscenes.utils.splits._get_custom_splits_file_path')
+    def test_delta(self, eval_split, mock__get_custom_splits_file_path):
         """
         This tests runs the evaluation for an arbitrary random set of predictions.
         This score is then captured in this very test such that if we change the eval code,
         this test will trigger if the results changed.
         """
+        mock__get_custom_splits_file_path.return_value = self.splits_file_mockup
+
         random.seed(42)
         np.random.seed(42)
         assert 'NUSCENES' in os.environ, 'Set NUSCENES env. variable to enable tests.'
 
         nusc = NuScenes(version='v1.0-mini', dataroot=os.environ['NUSCENES'], verbose=False)
 
         with open(self.res_mockup, 'w') as f:
-            json.dump(self._mock_submission(nusc, 'mini_val'), f, indent=2)
+            json.dump(self._mock_submission(nusc, eval_split), f, indent=2)
 
         cfg = config_factory('detection_cvpr_2019')
-        nusc_eval = DetectionEval(nusc, cfg, self.res_mockup, eval_set='mini_val', output_dir=self.res_eval_folder,
+        nusc_eval = DetectionEval(nusc, cfg, self.res_mockup, eval_set=eval_split, output_dir=self.res_eval_folder,
                                   verbose=False)
         metrics, md_list = nusc_eval.evaluate()
 
@@ -126,9 +146,8 @@ def test_delta(self):
         # 7. Score = 0.20237925145690996. After TP reversion bug.
         # 8. Score = 0.24047129251302665. After bike racks bug.
         # 9. Score = 0.24104572227466886. After bug fix in calc_tp. Include the max recall and exclude the min recall.
-        # 10. Score = 0.19449091580477748. Changed to use v1.0 mini_val split.
+        # 10. Score = 0.19449091580477748. Changed to use v1.0 mini_val split, and the equal mini_custom_val split.
         self.assertAlmostEqual(metrics.nd_score, 0.19449091580477748)
 
-
 if __name__ == '__main__':
     unittest.main()
diff --git a/python-sdk/nuscenes/eval/tracking/evaluate.py b/python-sdk/nuscenes/eval/tracking/evaluate.py
@@ -11,14 +11,23 @@
 
 from nuscenes import NuScenes
 from nuscenes.eval.common.config import config_factory
-from nuscenes.eval.common.loaders import load_prediction, load_gt, add_center_dist, filter_eval_boxes
+from nuscenes.eval.common.loaders import (
+    add_center_dist,
+    filter_eval_boxes,
+    get_samples_of_custom_split,
+    load_gt,
+    load_gt_of_sample_tokens,
+    load_prediction,
+    load_prediction_of_sample_tokens,
+)
 from nuscenes.eval.tracking.algo import TrackingEvaluation
 from nuscenes.eval.tracking.constants import AVG_METRIC_MAP, MOT_METRIC_MAP, LEGACY_METRICS
 from nuscenes.eval.tracking.data_classes import TrackingMetrics, TrackingMetricDataList, TrackingConfig, TrackingBox, \
     TrackingMetricData
 from nuscenes.eval.tracking.loaders import create_tracks
 from nuscenes.eval.tracking.render import recall_metric_curve, summary_plot
 from nuscenes.eval.tracking.utils import print_final_metrics
+from nuscenes.utils.splits import is_predefined_split
 
 
 class TrackingEval:
@@ -80,9 +89,17 @@ def __init__(self,
         # Load data.
         if verbose:
             print('Initializing nuScenes tracking evaluation')
-        pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, TrackingBox,
-                                                verbose=verbose)
-        gt_boxes = load_gt(nusc, self.eval_set, TrackingBox, verbose=verbose)
+
+        if is_predefined_split(split_name=eval_set):
+            pred_boxes, self.meta = load_prediction(
+                self.result_path, self.cfg.max_boxes_per_sample, TrackingBox, verbose=verbose
+            )
+            gt_boxes = load_gt(nusc, self.eval_set, TrackingBox, verbose=verbose)
+        else:
+            sample_tokens_of_custom_split : List[str] = get_samples_of_custom_split(split_name=eval_set, nusc=nusc)
+            pred_boxes, self.meta = load_prediction_of_sample_tokens(self.result_path, self.cfg.max_boxes_per_sample,
+                TrackingBox, sample_tokens=sample_tokens_of_custom_split, verbose=verbose)
+            gt_boxes = load_gt_of_sample_tokens(nusc, sample_tokens_of_custom_split, TrackingBox, verbose=verbose)
 
         assert set(pred_boxes.sample_tokens) == set(gt_boxes.sample_tokens), \
             "Samples in split don't match samples in predicted tracks."