diff --git a/ms2rescore/core.py b/ms2rescore/core.py index f63fe98..4690ac9 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -175,8 +175,8 @@ def _fill_missing_precursor_info(psm_list, config): get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required) # Check if values are now present - for value_name in ["retention_time", "ion_mobility"]: - if ( + for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]: + if required and ( 0.0 in psm_list[value_name] or None in psm_list[value_name] or np.isnan(psm_list[value_name]).any() diff --git a/ms2rescore/report/charts.py b/ms2rescore/report/charts.py index 48d543a..8d802b8 100644 --- a/ms2rescore/report/charts.py +++ b/ms2rescore/report/charts.py @@ -373,7 +373,7 @@ def identification_overlap( return figure levels = before.levels # ["psms", "peptides", "proteins"] if all available - indexers = ["index", "index", "mokapot protein group"] + indexers = ["index", "peptide", "mokapot protein group"] overlap_data = defaultdict(dict) for level, indexer in zip(levels, indexers): @@ -386,7 +386,7 @@ def identification_overlap( set_after = set(df_after[df_after["mokapot q-value"] <= 0.01][indexer]) overlap_data["removed"][level] = -len(set_before - set_after) - overlap_data["retained"][level] = len(set_before | set_after) + overlap_data["retained"][level] = len(set_after.intersection(set_before)) overlap_data["gained"][level] = len(set_after - set_before) colors = ["#953331", "#316395", "#319545"] diff --git a/ms2rescore/report/generate.py b/ms2rescore/report/generate.py index f5d2476..d74399e 100644 --- a/ms2rescore/report/generate.py +++ b/ms2rescore/report/generate.py @@ -185,7 +185,7 @@ def _get_stats_context(confidence_before, confidence_after): "item": level_name, "card_color": card_color, "number": after, - "diff": f"{after - before:+}", + "diff": f"({after - before:+})", "percentage": f"{increase:.1f}%", "is_increase": increase > 0, "bar_percentage": before / after * 100 if increase > 0 else after / before * 100, diff --git a/ms2rescore/rescoring_engines/mokapot.py b/ms2rescore/rescoring_engines/mokapot.py index cc7a336..4638247 100644 --- a/ms2rescore/rescoring_engines/mokapot.py +++ b/ms2rescore/rescoring_engines/mokapot.py @@ -31,6 +31,7 @@ from pyteomics.mass import nist_mass logger = logging.getLogger(__name__) +logging.getLogger("numba").setLevel(logging.WARNING) def rescore( @@ -89,7 +90,7 @@ def rescore( # Rescore logger.debug(f"Mokapot brew options: `{kwargs}`") - confidence_results, models = brew(lin_psm_data, **kwargs) + confidence_results, models = brew(lin_psm_data, rng=8, **kwargs) # Reshape confidence estimates to match PSMList mokapot_values_targets = ( diff --git a/ms2rescore/rescoring_engines/percolator.py b/ms2rescore/rescoring_engines/percolator.py index 5f7d4e5..c6ea3d3 100644 --- a/ms2rescore/rescoring_engines/percolator.py +++ b/ms2rescore/rescoring_engines/percolator.py @@ -20,8 +20,8 @@ import logging import subprocess from typing import Any, Dict, Optional +from copy import deepcopy -import numpy as np import psm_utils from ms2rescore.exceptions import MS2RescoreError @@ -103,8 +103,15 @@ def rescore( # Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list # to avoid modifying original... # TODO: Better approach for this? - psm_list_reindexed = psm_list.copy() - psm_list_reindexed["spectrum_id"] = np.arange(len(psm_list_reindexed)) + + psm_list_reindexed = deepcopy(psm_list) + psm_list_reindexed.set_ranks() + psm_list_reindexed["spectrum_id"] = [ + f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed + ] + spectrum_id_index = { + spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"]) + } _write_pin_file(psm_list_reindexed, pin_filepath) @@ -134,10 +141,13 @@ def rescore( psm_list, percolator_kwargs["results-psms"], percolator_kwargs["decoy-results-psms"], + spectrum_id_index, ) -def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str): +def _update_psm_scores( + psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list +): """ Update PSM scores with Percolator results. @@ -150,7 +160,9 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list) # Sort by reindexed spectrum_id so order matches original PSM list - psm_list_percolator[np.argsort(psm_list_percolator["spectrum_id"])] + psm_list_percolator = sorted( + psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]] + ) if not len(psm_list) == len(psm_list_percolator): raise MS2RescoreError(