Skip to content

Commit

Permalink
Allow defining of custom splits in a splits.json file (#1046)
Browse files Browse the repository at this point in the history
* Enable getting custom splits from splits.json file

* Enable custom splits.json in more parts of the devkit

Still some `create_splits_scenes` left, which don't support custom splits.

* Enable filtering the prediction/results by a (custom) split

So far, only GT got filtered by split, and the results file was expected to only contain the exact samples of one specific split.

* Keep original load_prediction and load_gt

* restore state from master for panoptic and lidarseg

* mock splits.json file in detection and tracking eval unit tests

* tidy up unit tests

* implement PR feedback comments
  • Loading branch information
michael-hoss authored Apr 2, 2024
1 parent 1b03e7d commit 4df2701
Show file tree
Hide file tree
Showing 8 changed files with 325 additions and 44 deletions.
163 changes: 159 additions & 4 deletions python-sdk/nuscenes/eval/common/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@
# Code written by Oscar Beijbom, 2019.

import json
from typing import Dict, Tuple
from typing import Dict, List, Tuple

import numpy as np
import tqdm
from pyquaternion import Quaternion

from nuscenes import NuScenes
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.eval.tracking.data_classes import TrackingBox
from nuscenes.utils.data_classes import Box
from nuscenes.utils.geometry_utils import points_in_box
from nuscenes.utils.splits import create_splits_scenes
from nuscenes.utils.splits import create_splits_scenes, get_scenes_of_custom_split
from pyquaternion import Quaternion


def load_prediction(result_path: str, max_boxes_per_sample: int, box_cls, verbose: bool = False) \
Expand Down Expand Up @@ -283,3 +282,159 @@ def _get_box_class_field(eval_boxes: EvalBoxes) -> str:
raise Exception('Error: Invalid box type: %s' % box)

return class_field

def load_prediction_of_sample_tokens(result_path: str, max_boxes_per_sample: int, box_cls,
sample_tokens: List[str], verbose: bool = False) \
-> Tuple[EvalBoxes, Dict]:
"""
Loads object predictions from file.
:param result_path: Path to the .json result file provided by the user.
:param max_boxes_per_sample: Maximim number of boxes allowed per sample.
:param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
:param verbose: Whether to print messages to stdout.
:param limit_to_split: Optional split name to filter the predictions by.
:param nusc: Optional NuScenes instance needed for filtering by split.
:return: The deserialized results and meta data.
"""

# Load from file and check that the format is correct.
with open(result_path) as f:
data = json.load(f)
assert 'results' in data, 'Error: No field `results` in result file. Please note that the result format changed.' \
'See https://www.nuscenes.org/object-detection for more information.'
assert isinstance(data['results'], dict), 'Error: results must be a dict.'

# Filter by sample tokens.
results_of_split : dict = {sample_token: data['results'][sample_token] for sample_token in sample_tokens}

# Deserialize results and get meta data.
boxes_of_split : EvalBoxes = EvalBoxes.deserialize(results_of_split, box_cls)
meta = data['meta']
if verbose:
print("Loaded results from {}. Found detections for {} samples."
.format(result_path, len(boxes_of_split.sample_tokens)))

# Check that each sample has no more than x predicted boxes.
for sample_token in boxes_of_split.sample_tokens:
assert len(boxes_of_split.boxes[sample_token]) <= max_boxes_per_sample, \
"Error: Only <= %d boxes per sample allowed!" % max_boxes_per_sample

return boxes_of_split, meta


def load_gt_of_sample_tokens(nusc: NuScenes, sample_tokens: List[str], box_cls,
verbose: bool = False) -> EvalBoxes:
"""
Loads ground truth boxes from DB.
:param nusc: A NuScenes instance.
:param eval_split: The evaluation split for which we load GT boxes.
:param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox.
:param verbose: Whether to print messages to stdout.
:return: The GT boxes.
"""
# Init.
if box_cls == DetectionBox:
attribute_map = {a['token']: a['name'] for a in nusc.attribute}

all_annotations = EvalBoxes()

# Load annotations and filter predictions and annotations.
tracking_id_set = set()
for sample_token in tqdm.tqdm(sample_tokens, leave=verbose):

sample = nusc.get('sample', sample_token)
sample_annotation_tokens = sample['anns']

sample_boxes = []
for sample_annotation_token in sample_annotation_tokens:

sample_annotation = nusc.get('sample_annotation', sample_annotation_token)
if box_cls == DetectionBox:
# Get label name in detection task and filter unused labels.
detection_name = category_to_detection_name(sample_annotation['category_name'])
if detection_name is None:
continue

# Get attribute_name.
attr_tokens = sample_annotation['attribute_tokens']
attr_count = len(attr_tokens)
if attr_count == 0:
attribute_name = ''
elif attr_count == 1:
attribute_name = attribute_map[attr_tokens[0]]
else:
raise Exception('Error: GT annotations must not have more than one attribute!')

sample_boxes.append(
box_cls(
sample_token=sample_token,
translation=sample_annotation['translation'],
size=sample_annotation['size'],
rotation=sample_annotation['rotation'],
velocity=nusc.box_velocity(sample_annotation['token'])[:2],
num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],
detection_name=detection_name,
detection_score=-1.0, # GT samples do not have a score.
attribute_name=attribute_name
)
)
elif box_cls == TrackingBox:
# Use nuScenes token as tracking id.
tracking_id = sample_annotation['instance_token']
tracking_id_set.add(tracking_id)

# Get label name in detection task and filter unused labels.
# Import locally to avoid errors when motmetrics package is not installed.
from nuscenes.eval.tracking.utils import category_to_tracking_name
tracking_name = category_to_tracking_name(sample_annotation['category_name'])
if tracking_name is None:
continue

sample_boxes.append(
box_cls(
sample_token=sample_token,
translation=sample_annotation['translation'],
size=sample_annotation['size'],
rotation=sample_annotation['rotation'],
velocity=nusc.box_velocity(sample_annotation['token'])[:2],
num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'],
tracking_id=tracking_id,
tracking_name=tracking_name,
tracking_score=-1.0 # GT samples do not have a score.
)
)
else:
raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls)

all_annotations.add_boxes(sample_token, sample_boxes)

if verbose:
print("Loaded ground truth annotations for {} samples.".format(len(all_annotations.sample_tokens)))

return all_annotations

def get_samples_of_custom_split(split_name: str, nusc : NuScenes) -> List[str]:
"""
Returns the sample tokens of a custom/user-defined split.
:param split_name: The name of the custom split.
:param nusc: The NuScenes instance.
:return: The sample tokens of the custom split.
"""

scenes_of_split : List[str] = get_scenes_of_custom_split(split_name=split_name, nusc=nusc)
sample_tokens_of_split : List[str] = get_samples_of_scenes(scene_names=scenes_of_split, nusc=nusc)
return sample_tokens_of_split

def get_samples_of_scenes(scene_names: List[str], nusc: NuScenes) -> List[str]:
"""Given a list of scene names, returns the sample tokens of these scenes."""

all_sample_tokens = [s['token'] for s in nusc.sample]
assert len(all_sample_tokens) > 0, "Error: Database has no samples!"

filtered_sample_tokens : List[str] = []
for sample_token in all_sample_tokens:
scene_token = nusc.get('sample', sample_token)['scene_token']
scene_record = nusc.get('scene', scene_token)
if scene_record['name'] in scene_names:
filtered_sample_tokens.append(sample_token)
return filtered_sample_tokens
37 changes: 28 additions & 9 deletions python-sdk/nuscenes/eval/detection/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,31 @@
import os
import random
import time
from typing import Tuple, Dict, Any
from typing import Any, Dict, List, Tuple

import numpy as np

from nuscenes import NuScenes
from nuscenes.eval.common.config import config_factory
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.common.loaders import load_prediction, load_gt, add_center_dist, filter_eval_boxes
from nuscenes.eval.common.loaders import (
add_center_dist,
filter_eval_boxes,
get_samples_of_custom_split,
load_gt,
load_gt_of_sample_tokens,
load_prediction,
load_prediction_of_sample_tokens,
)
from nuscenes.eval.detection.algo import accumulate, calc_ap, calc_tp
from nuscenes.eval.detection.constants import TP_METRICS
from nuscenes.eval.detection.data_classes import DetectionConfig, DetectionMetrics, DetectionBox, \
DetectionMetricDataList
from nuscenes.eval.detection.render import summary_plot, class_pr_curve, class_tp_curve, dist_pr_curve, visualize_sample
from nuscenes.eval.detection.data_classes import (
DetectionBox,
DetectionConfig,
DetectionMetricDataList,
DetectionMetrics,
)
from nuscenes.eval.detection.render import class_pr_curve, class_tp_curve, dist_pr_curve, summary_plot, visualize_sample
from nuscenes.utils.splits import is_predefined_split


class DetectionEval:
Expand Down Expand Up @@ -77,9 +89,16 @@ def __init__(self,
# Load data.
if verbose:
print('Initializing nuScenes detection evaluation')
self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionBox,
verbose=verbose)
self.gt_boxes = load_gt(self.nusc, self.eval_set, DetectionBox, verbose=verbose)

if is_predefined_split(split_name=eval_set):
self.pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, DetectionBox,
verbose=verbose)
self.gt_boxes = load_gt(self.nusc, self.eval_set, DetectionBox, verbose=verbose)
else:
sample_tokens_of_custom_split : List[str] = get_samples_of_custom_split(split_name=eval_set, nusc=nusc)
self.pred_boxes, self.meta = load_prediction_of_sample_tokens(self.result_path, self.cfg.max_boxes_per_sample,
DetectionBox, sample_tokens=sample_tokens_of_custom_split, verbose=verbose)
self.gt_boxes = load_gt_of_sample_tokens(nusc, sample_tokens_of_custom_split, DetectionBox, verbose=verbose)

assert set(self.pred_boxes.sample_tokens) == set(self.gt_boxes.sample_tokens), \
"Samples in split doesn't match samples in predictions."
Expand Down
41 changes: 30 additions & 11 deletions python-sdk/nuscenes/eval/detection/tests/test_evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,39 @@
import random
import shutil
import unittest
from typing import Dict
from typing import Dict, List
from unittest.mock import patch

import numpy as np
from tqdm import tqdm

from nuscenes import NuScenes
from nuscenes.eval.common.config import config_factory
from nuscenes.eval.detection.constants import DETECTION_NAMES
from nuscenes.eval.detection.evaluate import DetectionEval
from nuscenes.eval.detection.utils import category_to_detection_name, detection_name_to_rel_attributes
from nuscenes.utils.splits import create_splits_scenes
from nuscenes.utils.splits import get_scenes_of_split
from parameterized import parameterized
from tqdm import tqdm


class TestMain(unittest.TestCase):
res_mockup = 'nusc_eval.json'
res_eval_folder = 'tmp'
splits_file_mockup = 'mocked_splits.json'

def setUp(self):
with open(self.splits_file_mockup, 'w') as f:
json.dump({
"mini_custom_train": ["scene-0061", "scene-0553"],
"mini_custom_val": ["scene-0103", "scene-0916"]
}, f, indent=2)

def tearDown(self):
if os.path.exists(self.res_mockup):
os.remove(self.res_mockup)
if os.path.exists(self.res_eval_folder):
shutil.rmtree(self.res_eval_folder)
if os.path.exists(self.splits_file_mockup):
os.remove(self.splits_file_mockup)

@staticmethod
def _mock_submission(nusc: NuScenes, split: str) -> Dict[str, dict]:
Expand Down Expand Up @@ -68,10 +79,10 @@ def random_attr(name: str) -> str:
'use_external': False,
}
mock_results = {}
splits = create_splits_scenes()
scenes_of_eval_split : List[str] = get_scenes_of_split(split_name=split, nusc=nusc)
val_samples = []
for sample in nusc.sample:
if nusc.get('scene', sample['scene_token'])['name'] in splits[split]:
if nusc.get('scene', sample['scene_token'])['name'] in scenes_of_eval_split:
val_samples.append(sample)

for sample in tqdm(val_samples, leave=False):
Expand All @@ -97,23 +108,32 @@ def random_attr(name: str) -> str:
}
return mock_submission

def test_delta(self):


@parameterized.expand([
('mini_val',),
('mini_custom_val',)
])
@patch('nuscenes.utils.splits._get_custom_splits_file_path')
def test_delta(self, eval_split, mock__get_custom_splits_file_path):
"""
This tests runs the evaluation for an arbitrary random set of predictions.
This score is then captured in this very test such that if we change the eval code,
this test will trigger if the results changed.
"""
mock__get_custom_splits_file_path.return_value = self.splits_file_mockup

random.seed(42)
np.random.seed(42)
assert 'NUSCENES' in os.environ, 'Set NUSCENES env. variable to enable tests.'

nusc = NuScenes(version='v1.0-mini', dataroot=os.environ['NUSCENES'], verbose=False)

with open(self.res_mockup, 'w') as f:
json.dump(self._mock_submission(nusc, 'mini_val'), f, indent=2)
json.dump(self._mock_submission(nusc, eval_split), f, indent=2)

cfg = config_factory('detection_cvpr_2019')
nusc_eval = DetectionEval(nusc, cfg, self.res_mockup, eval_set='mini_val', output_dir=self.res_eval_folder,
nusc_eval = DetectionEval(nusc, cfg, self.res_mockup, eval_set=eval_split, output_dir=self.res_eval_folder,
verbose=False)
metrics, md_list = nusc_eval.evaluate()

Expand All @@ -126,9 +146,8 @@ def test_delta(self):
# 7. Score = 0.20237925145690996. After TP reversion bug.
# 8. Score = 0.24047129251302665. After bike racks bug.
# 9. Score = 0.24104572227466886. After bug fix in calc_tp. Include the max recall and exclude the min recall.
# 10. Score = 0.19449091580477748. Changed to use v1.0 mini_val split.
# 10. Score = 0.19449091580477748. Changed to use v1.0 mini_val split, and the equal mini_custom_val split.
self.assertAlmostEqual(metrics.nd_score, 0.19449091580477748)


if __name__ == '__main__':
unittest.main()
25 changes: 21 additions & 4 deletions python-sdk/nuscenes/eval/tracking/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,23 @@

from nuscenes import NuScenes
from nuscenes.eval.common.config import config_factory
from nuscenes.eval.common.loaders import load_prediction, load_gt, add_center_dist, filter_eval_boxes
from nuscenes.eval.common.loaders import (
add_center_dist,
filter_eval_boxes,
get_samples_of_custom_split,
load_gt,
load_gt_of_sample_tokens,
load_prediction,
load_prediction_of_sample_tokens,
)
from nuscenes.eval.tracking.algo import TrackingEvaluation
from nuscenes.eval.tracking.constants import AVG_METRIC_MAP, MOT_METRIC_MAP, LEGACY_METRICS
from nuscenes.eval.tracking.data_classes import TrackingMetrics, TrackingMetricDataList, TrackingConfig, TrackingBox, \
TrackingMetricData
from nuscenes.eval.tracking.loaders import create_tracks
from nuscenes.eval.tracking.render import recall_metric_curve, summary_plot
from nuscenes.eval.tracking.utils import print_final_metrics
from nuscenes.utils.splits import is_predefined_split


class TrackingEval:
Expand Down Expand Up @@ -80,9 +89,17 @@ def __init__(self,
# Load data.
if verbose:
print('Initializing nuScenes tracking evaluation')
pred_boxes, self.meta = load_prediction(self.result_path, self.cfg.max_boxes_per_sample, TrackingBox,
verbose=verbose)
gt_boxes = load_gt(nusc, self.eval_set, TrackingBox, verbose=verbose)

if is_predefined_split(split_name=eval_set):
pred_boxes, self.meta = load_prediction(
self.result_path, self.cfg.max_boxes_per_sample, TrackingBox, verbose=verbose
)
gt_boxes = load_gt(nusc, self.eval_set, TrackingBox, verbose=verbose)
else:
sample_tokens_of_custom_split : List[str] = get_samples_of_custom_split(split_name=eval_set, nusc=nusc)
pred_boxes, self.meta = load_prediction_of_sample_tokens(self.result_path, self.cfg.max_boxes_per_sample,
TrackingBox, sample_tokens=sample_tokens_of_custom_split, verbose=verbose)
gt_boxes = load_gt_of_sample_tokens(nusc, sample_tokens_of_custom_split, TrackingBox, verbose=verbose)

assert set(pred_boxes.sample_tokens) == set(gt_boxes.sample_tokens), \
"Samples in split don't match samples in predicted tracks."
Expand Down
Loading

0 comments on commit 4df2701

Please sign in to comment.