From 65d4e2e20ce6cc6fadf78e54298602abe20b06aa Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Fri, 29 Mar 2024 16:00:47 +0100 Subject: [PATCH 01/10] set peptide as column for set instead of index to see overlap --- ms2rescore/report/charts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2rescore/report/charts.py b/ms2rescore/report/charts.py index e601d826..93fdd4ff 100644 --- a/ms2rescore/report/charts.py +++ b/ms2rescore/report/charts.py @@ -349,7 +349,7 @@ def identification_overlap( """ levels = before.levels # ["psms", "peptides", "proteins"] if all available - indexers = ["index", "index", "mokapot protein group"] + indexers = ["index", "peptide", "mokapot protein group"] overlap_data = defaultdict(dict) for level, indexer in zip(levels, indexers): From e873e7325e86e94b28b296f4beb370efe9e7ba18 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 9 Apr 2024 16:48:59 +0200 Subject: [PATCH 02/10] fix percolator issue with report --- ms2rescore/rescoring_engines/percolator.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ms2rescore/rescoring_engines/percolator.py b/ms2rescore/rescoring_engines/percolator.py index 5f7d4e5d..eb2a1d20 100644 --- a/ms2rescore/rescoring_engines/percolator.py +++ b/ms2rescore/rescoring_engines/percolator.py @@ -20,6 +20,7 @@ import logging import subprocess from typing import Any, Dict, Optional +from copy import deepcopy import numpy as np import psm_utils @@ -103,8 +104,15 @@ def rescore( # Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list # to avoid modifying original... # TODO: Better approach for this? - psm_list_reindexed = psm_list.copy() - psm_list_reindexed["spectrum_id"] = np.arange(len(psm_list_reindexed)) + + psm_list_reindexed = deepcopy(psm_list) + psm_list_reindexed.set_ranks() + psm_list_reindexed["spectrum_id"] = [ + f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed + ] + spectrum_id_index = { + spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"]) + } _write_pin_file(psm_list_reindexed, pin_filepath) @@ -134,10 +142,13 @@ def rescore( psm_list, percolator_kwargs["results-psms"], percolator_kwargs["decoy-results-psms"], + spectrum_id_index, ) -def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str): +def _update_psm_scores( + psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list +): """ Update PSM scores with Percolator results. @@ -150,7 +161,9 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list) # Sort by reindexed spectrum_id so order matches original PSM list - psm_list_percolator[np.argsort(psm_list_percolator["spectrum_id"])] + psm_list_percolator = sorted( + psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]] + ) if not len(psm_list) == len(psm_list_percolator): raise MS2RescoreError( From dec5950251d15c7e6c0d67e7c0653ece317fba1a Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 9 Apr 2024 16:49:21 +0200 Subject: [PATCH 03/10] ionmob fix --- ms2rescore/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ms2rescore/core.py b/ms2rescore/core.py index 4ec44810..f788a5f0 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -62,9 +62,9 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None: rt_required = ("deeplc" in config["feature_generators"]) and ( None in psm_list["retention_time"] ) - im_required = ("ionmob" or "im2deep" in config["feature_generators"]) and ( - None in psm_list["ion_mobility"] - ) + im_required = ( + "ionmob" in config["feature_generators"] or "im2deep" in config["feature_generators"] + ) and (None in psm_list["ion_mobility"]) logger.debug(f"RT required: {rt_required}, IM required: {im_required}") if rt_required or im_required: From 09fbbfb0d4a4485d9a0ed6b49324f8c8305288df Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 9 Apr 2024 16:49:32 +0200 Subject: [PATCH 04/10] set logging of numba --- ms2rescore/rescoring_engines/mokapot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ms2rescore/rescoring_engines/mokapot.py b/ms2rescore/rescoring_engines/mokapot.py index cc7a336f..515ab4bc 100644 --- a/ms2rescore/rescoring_engines/mokapot.py +++ b/ms2rescore/rescoring_engines/mokapot.py @@ -31,6 +31,7 @@ from pyteomics.mass import nist_mass logger = logging.getLogger(__name__) +logging.getLogger("numba").setLevel(logging.WARNING) def rescore( From 6efddff6337811b0881e5d6d5de6eda0cfe10cda Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 9 Apr 2024 17:06:35 +0200 Subject: [PATCH 05/10] remove unused import --- ms2rescore/rescoring_engines/percolator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ms2rescore/rescoring_engines/percolator.py b/ms2rescore/rescoring_engines/percolator.py index eb2a1d20..c6ea3d3f 100644 --- a/ms2rescore/rescoring_engines/percolator.py +++ b/ms2rescore/rescoring_engines/percolator.py @@ -22,7 +22,6 @@ from typing import Any, Dict, Optional from copy import deepcopy -import numpy as np import psm_utils from ms2rescore.exceptions import MS2RescoreError From d1c6388bb36936fcbab1e47eb6855cf1b9a8b78f Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 10 Apr 2024 11:27:17 +0200 Subject: [PATCH 06/10] add default random num generator seed --- ms2rescore/rescoring_engines/mokapot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2rescore/rescoring_engines/mokapot.py b/ms2rescore/rescoring_engines/mokapot.py index 515ab4bc..4638247d 100644 --- a/ms2rescore/rescoring_engines/mokapot.py +++ b/ms2rescore/rescoring_engines/mokapot.py @@ -90,7 +90,7 @@ def rescore( # Rescore logger.debug(f"Mokapot brew options: `{kwargs}`") - confidence_results, models = brew(lin_psm_data, **kwargs) + confidence_results, models = brew(lin_psm_data, rng=8, **kwargs) # Reshape confidence estimates to match PSMList mokapot_values_targets = ( From 1e11b4bdff619dab8fe47032042989e2580cad42 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 10 Apr 2024 11:28:18 +0200 Subject: [PATCH 07/10] changes to report --- ms2rescore/report/charts.py | 2 +- ms2rescore/report/generate.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ms2rescore/report/charts.py b/ms2rescore/report/charts.py index 92f63c8a..8d802b8b 100644 --- a/ms2rescore/report/charts.py +++ b/ms2rescore/report/charts.py @@ -386,7 +386,7 @@ def identification_overlap( set_after = set(df_after[df_after["mokapot q-value"] <= 0.01][indexer]) overlap_data["removed"][level] = -len(set_before - set_after) - overlap_data["retained"][level] = len(set_before | set_after) + overlap_data["retained"][level] = len(set_after.intersection(set_before)) overlap_data["gained"][level] = len(set_after - set_before) colors = ["#953331", "#316395", "#319545"] diff --git a/ms2rescore/report/generate.py b/ms2rescore/report/generate.py index f5d24768..400fb158 100644 --- a/ms2rescore/report/generate.py +++ b/ms2rescore/report/generate.py @@ -180,12 +180,13 @@ def _get_stats_context(confidence_before, confidence_after): if not before or not after: continue increase = (after - before) / before * 100 + diff = after - before stats.append( { "item": level_name, "card_color": card_color, "number": after, - "diff": f"{after - before:+}", + "diff": f"({diff:+})", "percentage": f"{increase:.1f}%", "is_increase": increase > 0, "bar_percentage": before / after * 100 if increase > 0 else after / before * 100, From b144d5fb58421c7e950bfa2daf94ed431064008b Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 10 Apr 2024 11:30:11 +0200 Subject: [PATCH 08/10] fix _fill_missing_precursor_info check --- ms2rescore/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ms2rescore/core.py b/ms2rescore/core.py index f63fe982..40ab46aa 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -175,7 +175,11 @@ def _fill_missing_precursor_info(psm_list, config): get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required) # Check if values are now present - for value_name in ["retention_time", "ion_mobility"]: + for value_name, required in zip( + ["retention_time", "ion_mobility"], [rt_required, im_required] + ): + if not required: + continue if ( 0.0 in psm_list[value_name] or None in psm_list[value_name] From 1999039d55ad0bfb9178431dfbeb386ac0121a75 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 10 Apr 2024 13:42:53 +0200 Subject: [PATCH 09/10] Apply suggestions from code review :recycle: :recycle: Co-authored-by: Ralf Gabriels --- ms2rescore/core.py | 8 ++------ ms2rescore/report/generate.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/ms2rescore/core.py b/ms2rescore/core.py index 40ab46aa..2b25d7d8 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -175,12 +175,8 @@ def _fill_missing_precursor_info(psm_list, config): get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required) # Check if values are now present - for value_name, required in zip( - ["retention_time", "ion_mobility"], [rt_required, im_required] - ): - if not required: - continue - if ( + for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]): + if required and ( 0.0 in psm_list[value_name] or None in psm_list[value_name] or np.isnan(psm_list[value_name]).any() diff --git a/ms2rescore/report/generate.py b/ms2rescore/report/generate.py index 400fb158..d74399e4 100644 --- a/ms2rescore/report/generate.py +++ b/ms2rescore/report/generate.py @@ -180,13 +180,12 @@ def _get_stats_context(confidence_before, confidence_after): if not before or not after: continue increase = (after - before) / before * 100 - diff = after - before stats.append( { "item": level_name, "card_color": card_color, "number": after, - "diff": f"({diff:+})", + "diff": f"({after - before:+})", "percentage": f"{increase:.1f}%", "is_increase": increase > 0, "bar_percentage": before / after * 100 if increase > 0 else after / before * 100, From 162f7199bd6d3b2bac51c2ce4441dbff2a80e9e6 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 10 Apr 2024 13:46:47 +0200 Subject: [PATCH 10/10] fixed Ralfs suggestions --- ms2rescore/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2rescore/core.py b/ms2rescore/core.py index 2b25d7d8..4690ac91 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -175,7 +175,7 @@ def _fill_missing_precursor_info(psm_list, config): get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required) # Check if values are now present - for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]): + for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]: if required and ( 0.0 in psm_list[value_name] or None in psm_list[value_name]