Skip to content

Commit

Permalink
Merge pull request #144 from compomics/peptide-num-fix
Browse files Browse the repository at this point in the history
Fixes for MS²Rescore report
  • Loading branch information
ArthurDeclercq authored Apr 10, 2024
2 parents fad30de + 162f719 commit 2c55cc2
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 11 deletions.
4 changes: 2 additions & 2 deletions ms2rescore/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ def _fill_missing_precursor_info(psm_list, config):
get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)

# Check if values are now present
for value_name in ["retention_time", "ion_mobility"]:
if (
for value_name, required in [("retention_time", rt_required), ("ion_mobility", im_required)]:
if required and (
0.0 in psm_list[value_name]
or None in psm_list[value_name]
or np.isnan(psm_list[value_name]).any()
Expand Down
4 changes: 2 additions & 2 deletions ms2rescore/report/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def identification_overlap(
return figure

levels = before.levels # ["psms", "peptides", "proteins"] if all available
indexers = ["index", "index", "mokapot protein group"]
indexers = ["index", "peptide", "mokapot protein group"]

overlap_data = defaultdict(dict)
for level, indexer in zip(levels, indexers):
Expand All @@ -386,7 +386,7 @@ def identification_overlap(
set_after = set(df_after[df_after["mokapot q-value"] <= 0.01][indexer])

overlap_data["removed"][level] = -len(set_before - set_after)
overlap_data["retained"][level] = len(set_before | set_after)
overlap_data["retained"][level] = len(set_after.intersection(set_before))
overlap_data["gained"][level] = len(set_after - set_before)

colors = ["#953331", "#316395", "#319545"]
Expand Down
2 changes: 1 addition & 1 deletion ms2rescore/report/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def _get_stats_context(confidence_before, confidence_after):
"item": level_name,
"card_color": card_color,
"number": after,
"diff": f"{after - before:+}",
"diff": f"({after - before:+})",
"percentage": f"{increase:.1f}%",
"is_increase": increase > 0,
"bar_percentage": before / after * 100 if increase > 0 else after / before * 100,
Expand Down
3 changes: 2 additions & 1 deletion ms2rescore/rescoring_engines/mokapot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from pyteomics.mass import nist_mass

logger = logging.getLogger(__name__)
logging.getLogger("numba").setLevel(logging.WARNING)


def rescore(
Expand Down Expand Up @@ -89,7 +90,7 @@ def rescore(

# Rescore
logger.debug(f"Mokapot brew options: `{kwargs}`")
confidence_results, models = brew(lin_psm_data, **kwargs)
confidence_results, models = brew(lin_psm_data, rng=8, **kwargs)

# Reshape confidence estimates to match PSMList
mokapot_values_targets = (
Expand Down
22 changes: 17 additions & 5 deletions ms2rescore/rescoring_engines/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
import logging
import subprocess
from typing import Any, Dict, Optional
from copy import deepcopy

import numpy as np
import psm_utils

from ms2rescore.exceptions import MS2RescoreError
Expand Down Expand Up @@ -103,8 +103,15 @@ def rescore(
# Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list
# to avoid modifying original...
# TODO: Better approach for this?
psm_list_reindexed = psm_list.copy()
psm_list_reindexed["spectrum_id"] = np.arange(len(psm_list_reindexed))

psm_list_reindexed = deepcopy(psm_list)
psm_list_reindexed.set_ranks()
psm_list_reindexed["spectrum_id"] = [
f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed
]
spectrum_id_index = {
spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"])
}

_write_pin_file(psm_list_reindexed, pin_filepath)

Expand Down Expand Up @@ -134,10 +141,13 @@ def rescore(
psm_list,
percolator_kwargs["results-psms"],
percolator_kwargs["decoy-results-psms"],
spectrum_id_index,
)


def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str):
def _update_psm_scores(
psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list
):
"""
Update PSM scores with Percolator results.
Expand All @@ -150,7 +160,9 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout
psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list)

# Sort by reindexed spectrum_id so order matches original PSM list
psm_list_percolator[np.argsort(psm_list_percolator["spectrum_id"])]
psm_list_percolator = sorted(
psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]]
)

if not len(psm_list) == len(psm_list_percolator):
raise MS2RescoreError(
Expand Down

0 comments on commit 2c55cc2

Please sign in to comment.