From c57a2f6b482b029a6cbcab4b1d1510e03424b519 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Thu, 31 Aug 2023 16:02:55 +0200 Subject: [PATCH 01/18] ionmob feature generator --- ms2rescore/feature_generators/ionmob.py | 289 ++++++++++++++++++++++++ 1 file changed, 289 insertions(+) create mode 100644 ms2rescore/feature_generators/ionmob.py diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py new file mode 100644 index 00000000..b5b31a74 --- /dev/null +++ b/ms2rescore/feature_generators/ionmob.py @@ -0,0 +1,289 @@ +import pandas as pd +import tensorflow as tf +from itertools import chain +import logging + +from ms2rescore.feature_generators._base_classes import FeatureGeneratorBase +from psm_utils import PSMList +from ionmob.preprocess.data import to_tf_dataset_inference +from ionmob.utilities import tokenizer_from_json, reduced_mobility_to_ccs, get_ccs_shift +from ionmob.preprocess.data import calculate_mz +from ionmob.utilities.chemistry import VARIANT_DICT + + +logger = logging.getLogger(__name__) + + +class IonMobFeatureGenerator(FeatureGeneratorBase): + """Ionmob Collision Cross Section (CCS)-based feature generator.""" + + def __init__( + self, + *args, + model: str = "pretrained_models/GRUPredictor", + reference_dataset="example_data/reference.parquet", + tokenizer_filepath="pretrained_models/tokenizers/tokenizer.json", + processes: 1, + **kwargs, + ) -> None: + """ + Ionmob Collision Cross Section (CCS)-based feature generator. + + Parameters + ---------- + #TODO + + """ + super().__init__(*args, **kwargs) + self.model = tf.keras.models.load_model(model) + self.processes = processes + self.reference_dataset = pd.read_parquet(reference_dataset) + self.tokenizer = tokenizer_from_json(tokenizer_filepath) + + @property + def feature_names(self): + return [ + "ccs_predicted", + "ccs_observed", + "ccs_error", + "abs_ccs_error", + "perc_ccs_error", + ] + + @property + def allowed_mods(self): + return [token for aa_tokens in VARIANT_DICT.values() for token in aa_tokens] + + def add_features(self, psm_list: PSMList) -> None: + """ + Add Ionmob-derived features to PSMs. + + Parameters + ---------- + psm_list + PSMs to add features to. + + """ + logger.info("Adding Ionmob-derived features to PSMs.") + psm_dict = psm_list.get_psm_dict() + current_run = 0 + total_runs = len(psm_dict.values()) + + for runs in psm_dict.values(): + for run, psms in runs.items(): + logger.info( + f"Running Ionmob for PSMs from run ({current_run}/{total_runs}): `{run}`..." + ) + psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) + psm_list_run_df = psm_list_run.to_dataframe() + + # prepare dataframes for CCS prediction + psm_list_run_df["charge"] = [ + peptidoform.charge for peptidoform in psm_list_run_df["peptidoform"] + ] + psm_list_run_df = psm_list_run_df[ + psm_list_run_df["charge"] < 5 + ] # predictions do not go higher for ionmob + + psm_list_run_df["sequence-tokenized"] = psm_list_run_df["peptidoform"].apply( + self.proforma_tokenizer, axis=1 + ) + psm_list_run_df = psm_list_run_df[~(self.invalid_mods(psm_list_run_df))] + psm_list_run_df["mz"] = psm_list_run_df.apply( + lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1 + ) # use precursor m/z from PSMs? + + psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( + lambda x: reduced_mobility_to_ccs(x["ion_mobility"], x["mz"], x["charge"]), + axis=1, + ) + + # calibrate CCS values + shift_factor = self.calculate_ccs_shift(self, psm_list_run_df) + psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( + lambda r: r["ccs_observed"] + shift_factor, axis=1 + ) + + # predict CCS values + tf_ds = to_tf_dataset_inference( + psm_list_run_df["mz"], + psm_list_run_df["charge"], + psm_list_run_df["sequence-tokenized"], + self.tokenizer, + ) + + psm_list_run_df["ccs_predicted"], _ = self.model.predict(tf_ds) + + # calculate CCS features + ccs_features = self._calculate_features(psm_list_run_df) + + # add CCS features to PSMs + for psm in psms.values(): + try: + psm["rescoring_features"].update(ccs_features[psm.spectrum_id]) + except KeyError: + psm["rescoring_features"].update({}) + current_run += 1 + + def _calculate_features(self, feature_df): + """Get ccs features for PSMs.""" + + ccs_features = {} + for row in feature_df.iterrows(): + ccs_features[row["spectrum_id"]] = { + "ccs_predicted": row["ccs_predicted"], + "ccs_observed": row["ccs_observed"], + "ccs_error": row["ccs_observed"] - row["ccs_predicted"], + "abs_ccs_error": abs(row["ccs_observed"] - row["ccs_predicted"]), + "perc_ccs_error": (row["abs_ccs_error"] / row["ccs_observed"]) * 100, + } + + return ccs_features + + @staticmethod + def proforma_tokenizer(peptidoform): + """ + Tokenize proforma sequence and add modifications. + + Args: + seq (str): Peptide sequence. + peprec_mod (str): Peptide modifications in the format "loc1|mod1|loc2|mod2|...". + + Returns: + list: A list of tokenized and modified peptide sequence. + """ + tokenized_seq = [] + + if peptidoform.properties["n_term"]: + tokenized_seq.append( + f"[UNIMOD:{peptidoform.properties['n_term'].definition['id']}]" + ) + else: + tokenized_seq.append("") + + if peptidoform.properties["c_term"]: + pass # provide if c-term mods are supported + + for amino_acid, modification in peptidoform.parsed_sequence: + tokenized_seq.append(amino_acid) + if modification: + tokenized_seq[-1] = tokenized_seq[-1] + tokenized_seq.append( + f"[UNIMOD:{modification[0].definition['id']}]" + ) + + return tokenized_seq + + def calculate_ccs_shift(self, psm_dataframe): + """ + Apply CCS shift to CCS values. + + Args: + peprec (pandas.DataFrame): Modified and parsed Peprec data. + reference (str): Path to the reference data used for CCS shift calculation. + + Returns: + pandas.DataFrame: Peprec data with CCS values after applying the shift. + """ + df = psm_dataframe.copy() + df["charge"] = [peptidoform.charge for peptidoform in df["peptidoform"]] + high_conf_hits = list( + psm_dataframe["spectrum_id"][psm_dataframe["score"].rank(pct=True) > 0.95] + ) + logger.debug( + f"Number of high confidence hits for calculating shift: {len(high_conf_hits)}" + ) + + shift_factor = get_ccs_shift( + psm_dataframe[psm_dataframe["spec_id"].isin(high_conf_hits)][ + ["charge", "sequence-tokenized", "ccs"] + ], + self.reference_data, + ) + + logger.debug(f"CCS shift factor: {shift_factor}") + + return shift_factor + + def invalid_mods(self, tokenized_seq): + """ + Check if peptide sequence contains invalid modifications. + + Args: + tokenized_seq (list): Tokenized peptide sequence. + + Returns: + bool: True if invalid modifications are present, False otherwise. + """ + for token in tokenized_seq: + if token not in self.allowed_mods: + logger.debug(f"Invalid modification found: {token}") + return True + return False + + +def add_ccs_predictions(peprec, model_path, tokenizer_filepath): + """ + Add CCS predictions to peprec file. + + Args: + peprec (pandas.DataFrame): Modified and parsed Peprec data. + model_path (str): Path to the CCS prediction model. + tokenizer_filepath (str): Path to the tokenizer file. + + Returns: + pandas.DataFrame: Peprec data with added CCS predictions and error values. + """ + logger.info(f"Adding CCS predictions to peprec file") + + tokenizer = tokenizer_from_json(tokenizer_filepath) + tf_ds = to_tf_dataset_inference( + peprec["mz"], peprec["charge"], peprec["sequence-tokenized"], tokenizer + ) + gruModel = tf.keras.models.load_model(model_path) + peprec["ccs_predicted"], _ = gruModel.predict(tf_ds) + + peprec["ccs_error"] = peprec["ccs_observed"] - peprec["ccs_predicted"] + peprec["abs_ccs_error"] = abs(peprec["ccs_observed"] - peprec["ccs_predicted"]) + peprec["perc_ccs_error"] = (peprec["abs_ccs_error"] / peprec["ccs_observed"]) * 100 + + peprec["sequence-tokenized"] = peprec.apply(lambda x: "".join(x["sequence-tokenized"]), axis=1) + + return peprec + + +def write_pin_files(peprec, pin, pin_filepath): + """ + Write pin files. + + Args: + peprec (pandas.DataFrame): Peprec data. + pin (pandas.DataFrame): Percolator In data. + pin_filepath (str): Path to the Percolator In file. + + Returns: + None + """ + ccs_filename, non_ccs_filename = create_filenames(pin_filepath) + ccs_features = [ + "ccs_predicted", + "ccs_observed", + "ccs_error", + "abs_ccs_error", + "perc_ccs_error", + ] + final_pin = pd.merge( + pin, peprec[["spec_id"] + ccs_features], left_on="SpecId", right_on="spec_id" + ).drop("spec_id", axis=1) + + final_pin = final_pin[ + [c for c in final_pin.columns if c not in ["Peptide", "Proteins"]] + + ["Peptide", "Proteins"] + ] + + logger.info(f"Writing pin files to {ccs_filename} and {non_ccs_filename}") + final_pin.to_csv(ccs_filename, sep="\t", index=False, header=True) + redo_pin_tabs(str(ccs_filename)) + final_pin.drop(ccs_features, axis=1).to_csv( + non_ccs_filename, sep="\t", index=False, header=True + ) + redo_pin_tabs(str(non_ccs_filename)) From b3dab9678bccd797d64988094911577e8f1700c6 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 5 Sep 2023 12:25:35 +0200 Subject: [PATCH 02/18] ionmob changes --- ms2rescore/feature_generators/__init__.py | 2 + ms2rescore/feature_generators/ionmob.py | 181 ++++++++------------- ms2rescore/package_data/config_schema.json | 27 +++ 3 files changed, 95 insertions(+), 115 deletions(-) diff --git a/ms2rescore/feature_generators/__init__.py b/ms2rescore/feature_generators/__init__.py index dc1931d3..4000c885 100644 --- a/ms2rescore/feature_generators/__init__.py +++ b/ms2rescore/feature_generators/__init__.py @@ -4,10 +4,12 @@ from ms2rescore.feature_generators.deeplc import DeepLCFeatureGenerator from ms2rescore.feature_generators.maxquant import MaxQuantFeatureGenerator from ms2rescore.feature_generators.ms2pip import MS2PIPFeatureGenerator +from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator FEATURE_GENERATORS = { "basic": BasicFeatureGenerator, "ms2pip": MS2PIPFeatureGenerator, "deeplc": DeepLCFeatureGenerator, "maxquant": MaxQuantFeatureGenerator, + "ionmob": IonMobFeatureGenerator, } diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index b5b31a74..3c4a588a 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -2,17 +2,31 @@ import tensorflow as tf from itertools import chain import logging +import os from ms2rescore.feature_generators._base_classes import FeatureGeneratorBase from psm_utils import PSMList from ionmob.preprocess.data import to_tf_dataset_inference -from ionmob.utilities import tokenizer_from_json, reduced_mobility_to_ccs, get_ccs_shift -from ionmob.preprocess.data import calculate_mz -from ionmob.utilities.chemistry import VARIANT_DICT +from ionmob.utilities.utility import get_ccs_shift +from ionmob.utilities.tokenization import tokenizer_from_json +from ionmob.utilities.chemistry import VARIANT_DICT, reduced_mobility_to_ccs, calculate_mz +import ionmob logger = logging.getLogger(__name__) +ionmob_dir = os.path.dirname(os.path.realpath(ionmob.__file__)) +DEFAULT_MODELS_IONMOB = { + "ionmob/pretrained_models/DeepTwoMerModel", + "ionmob/pretrained_models/GRUPredictor", + "ionmob/pretrained_models/SqrtModel", +} +DEFAULT_MODELS_DICT = { + mod.split("/")[1]: os.path.join(ionmob_dir, mod) for mod in DEFAULT_MODELS_IONMOB +} +DEFAULT_TOKENIZER = os.path.join(ionmob_dir, "pretrained_models/tokenizer.json") +DEFAULT_REFERENCE_DATASET = os.path.join(ionmob_dir, "pretrained_models/Tenzer_unimod.parquet") + class IonMobFeatureGenerator(FeatureGeneratorBase): """Ionmob Collision Cross Section (CCS)-based feature generator.""" @@ -20,10 +34,9 @@ class IonMobFeatureGenerator(FeatureGeneratorBase): def __init__( self, *args, - model: str = "pretrained_models/GRUPredictor", - reference_dataset="example_data/reference.parquet", - tokenizer_filepath="pretrained_models/tokenizers/tokenizer.json", - processes: 1, + ionmob_model: str = "GRUPredictor", + reference_dataset: str = DEFAULT_REFERENCE_DATASET, + tokenizer: str = DEFAULT_TOKENIZER, **kwargs, ) -> None: """ @@ -35,10 +48,13 @@ def __init__( """ super().__init__(*args, **kwargs) - self.model = tf.keras.models.load_model(model) - self.processes = processes + try: + self.ionmob_model = tf.keras.models.load_model(DEFAULT_MODELS_DICT[ionmob_model]) + except KeyError: + self.ionmob_model = tf.keras.models.load_model(ionmob_model) + self.reference_dataset = pd.read_parquet(reference_dataset) - self.tokenizer = tokenizer_from_json(tokenizer_filepath) + self.tokenizer = tokenizer_from_json(tokenizer) @property def feature_names(self): @@ -79,16 +95,22 @@ def add_features(self, psm_list: PSMList) -> None: # prepare dataframes for CCS prediction psm_list_run_df["charge"] = [ - peptidoform.charge for peptidoform in psm_list_run_df["peptidoform"] + peptidoform.precursor_charge for peptidoform in psm_list_run_df["peptidoform"] ] psm_list_run_df = psm_list_run_df[ psm_list_run_df["charge"] < 5 ] # predictions do not go higher for ionmob - psm_list_run_df["sequence-tokenized"] = psm_list_run_df["peptidoform"].apply( - self.proforma_tokenizer, axis=1 + psm_list_run_df["sequence-tokenized"] = psm_list_run_df.apply( + lambda x: self.tokenize_peptidoform(x["peptidoform"]), axis=1 ) - psm_list_run_df = psm_list_run_df[~(self.invalid_mods(psm_list_run_df))] + psm_list_run_df = psm_list_run_df[ + psm_list_run_df.apply( + lambda x: self._is_valid_tokenized_sequence(x["sequence-tokenized"]), + axis=1, + ) + ] + psm_list_run_df["mz"] = psm_list_run_df.apply( lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1 ) # use precursor m/z from PSMs? @@ -97,13 +119,11 @@ def add_features(self, psm_list: PSMList) -> None: lambda x: reduced_mobility_to_ccs(x["ion_mobility"], x["mz"], x["charge"]), axis=1, ) - # calibrate CCS values - shift_factor = self.calculate_ccs_shift(self, psm_list_run_df) + shift_factor = self.calculate_ccs_shift(psm_list_run_df) psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( - lambda r: r["ccs_observed"] + shift_factor, axis=1 + lambda x: x["ccs_observed"] + shift_factor, axis=1 ) - # predict CCS values tf_ds = to_tf_dataset_inference( psm_list_run_df["mz"], @@ -112,13 +132,13 @@ def add_features(self, psm_list: PSMList) -> None: self.tokenizer, ) - psm_list_run_df["ccs_predicted"], _ = self.model.predict(tf_ds) + psm_list_run_df["ccs_predicted"], _ = self.ionmob_model.predict(tf_ds) # calculate CCS features ccs_features = self._calculate_features(psm_list_run_df) # add CCS features to PSMs - for psm in psms.values(): + for psm in psm_list_run: try: psm["rescoring_features"].update(ccs_features[psm.spectrum_id]) except KeyError: @@ -129,19 +149,20 @@ def _calculate_features(self, feature_df): """Get ccs features for PSMs.""" ccs_features = {} - for row in feature_df.iterrows(): - ccs_features[row["spectrum_id"]] = { - "ccs_predicted": row["ccs_predicted"], - "ccs_observed": row["ccs_observed"], - "ccs_error": row["ccs_observed"] - row["ccs_predicted"], - "abs_ccs_error": abs(row["ccs_observed"] - row["ccs_predicted"]), - "perc_ccs_error": (row["abs_ccs_error"] / row["ccs_observed"]) * 100, + for row in feature_df.itertuples(): + ccs_features[row.spectrum_id] = { + "ccs_predicted": row.ccs_predicted, + "ccs_observed": row.ccs_observed, + "ccs_error": row.ccs_observed - row.ccs_predicted, + "abs_ccs_error": abs(row.ccs_observed - row.ccs_predicted), + "perc_ccs_error": ((abs(row.ccs_observed - row.ccs_predicted)) / row.ccs_observed) + * 100, } return ccs_features @staticmethod - def proforma_tokenizer(peptidoform): + def tokenize_peptidoform(peptidoform): """ Tokenize proforma sequence and add modifications. @@ -156,21 +177,23 @@ def proforma_tokenizer(peptidoform): if peptidoform.properties["n_term"]: tokenized_seq.append( - f"[UNIMOD:{peptidoform.properties['n_term'].definition['id']}]" + f"[UNIMOD:{peptidoform.properties['n_term'][0].definition['id']}]" ) else: tokenized_seq.append("") - if peptidoform.properties["c_term"]: - pass # provide if c-term mods are supported - for amino_acid, modification in peptidoform.parsed_sequence: tokenized_seq.append(amino_acid) if modification: - tokenized_seq[-1] = tokenized_seq[-1] + tokenized_seq.append( - f"[UNIMOD:{modification[0].definition['id']}]" + tokenized_seq[-1] = ( + tokenized_seq[-1] + f"[UNIMOD:{modification[0].definition['id']}]" ) + if peptidoform.properties["c_term"]: + pass # provide if c-term mods are supported + else: + tokenized_seq.append("") + return tokenized_seq def calculate_ccs_shift(self, psm_dataframe): @@ -185,105 +208,33 @@ def calculate_ccs_shift(self, psm_dataframe): pandas.DataFrame: Peprec data with CCS values after applying the shift. """ df = psm_dataframe.copy() - df["charge"] = [peptidoform.charge for peptidoform in df["peptidoform"]] - high_conf_hits = list( - psm_dataframe["spectrum_id"][psm_dataframe["score"].rank(pct=True) > 0.95] - ) + df.rename({"ccs_observed": "ccs"}, axis=1, inplace=True) + high_conf_hits = list(df["spectrum_id"][df["score"].rank(pct=True) > 0.95]) logger.debug( f"Number of high confidence hits for calculating shift: {len(high_conf_hits)}" ) shift_factor = get_ccs_shift( - psm_dataframe[psm_dataframe["spec_id"].isin(high_conf_hits)][ - ["charge", "sequence-tokenized", "ccs"] - ], - self.reference_data, + df[["charge", "sequence-tokenized", "ccs"]][df["spectrum_id"].isin(high_conf_hits)], + self.reference_dataset, ) logger.debug(f"CCS shift factor: {shift_factor}") return shift_factor - def invalid_mods(self, tokenized_seq): + def _is_valid_tokenized_sequence(self, tokenized_seq): """ - Check if peptide sequence contains invalid modifications. + Check if peptide sequence contains invalid tokens. Args: tokenized_seq (list): Tokenized peptide sequence. Returns: - bool: True if invalid modifications are present, False otherwise. + bool: False if invalid tokens are present, True otherwise. """ for token in tokenized_seq: if token not in self.allowed_mods: logger.debug(f"Invalid modification found: {token}") - return True - return False - - -def add_ccs_predictions(peprec, model_path, tokenizer_filepath): - """ - Add CCS predictions to peprec file. - - Args: - peprec (pandas.DataFrame): Modified and parsed Peprec data. - model_path (str): Path to the CCS prediction model. - tokenizer_filepath (str): Path to the tokenizer file. - - Returns: - pandas.DataFrame: Peprec data with added CCS predictions and error values. - """ - logger.info(f"Adding CCS predictions to peprec file") - - tokenizer = tokenizer_from_json(tokenizer_filepath) - tf_ds = to_tf_dataset_inference( - peprec["mz"], peprec["charge"], peprec["sequence-tokenized"], tokenizer - ) - gruModel = tf.keras.models.load_model(model_path) - peprec["ccs_predicted"], _ = gruModel.predict(tf_ds) - - peprec["ccs_error"] = peprec["ccs_observed"] - peprec["ccs_predicted"] - peprec["abs_ccs_error"] = abs(peprec["ccs_observed"] - peprec["ccs_predicted"]) - peprec["perc_ccs_error"] = (peprec["abs_ccs_error"] / peprec["ccs_observed"]) * 100 - - peprec["sequence-tokenized"] = peprec.apply(lambda x: "".join(x["sequence-tokenized"]), axis=1) - - return peprec - - -def write_pin_files(peprec, pin, pin_filepath): - """ - Write pin files. - - Args: - peprec (pandas.DataFrame): Peprec data. - pin (pandas.DataFrame): Percolator In data. - pin_filepath (str): Path to the Percolator In file. - - Returns: - None - """ - ccs_filename, non_ccs_filename = create_filenames(pin_filepath) - ccs_features = [ - "ccs_predicted", - "ccs_observed", - "ccs_error", - "abs_ccs_error", - "perc_ccs_error", - ] - final_pin = pd.merge( - pin, peprec[["spec_id"] + ccs_features], left_on="SpecId", right_on="spec_id" - ).drop("spec_id", axis=1) - - final_pin = final_pin[ - [c for c in final_pin.columns if c not in ["Peptide", "Proteins"]] - + ["Peptide", "Proteins"] - ] - - logger.info(f"Writing pin files to {ccs_filename} and {non_ccs_filename}") - final_pin.to_csv(ccs_filename, sep="\t", index=False, header=True) - redo_pin_tabs(str(ccs_filename)) - final_pin.drop(ccs_features, axis=1).to_csv( - non_ccs_filename, sep="\t", index=False, header=True - ) - redo_pin_tabs(str(non_ccs_filename)) + return False + return True diff --git a/ms2rescore/package_data/config_schema.json b/ms2rescore/package_data/config_schema.json index 27499f10..786e3985 100644 --- a/ms2rescore/package_data/config_schema.json +++ b/ms2rescore/package_data/config_schema.json @@ -27,6 +27,9 @@ }, "maxquant": { "$ref": "#/$defs/feature_generator/$defs/maxquant" + }, + "ionmob": { + "$ref": "#/$defs/feature_generator/$defs/ionmob" } } }, @@ -182,6 +185,30 @@ "description": "MaxQuant feature generator configuration", "type": "object", "additionalProperties": true + }, + "ionmob": { + "$schema": "#/definitions/feature_generator", + "description": "Ion mobility feature generator configuration using Ionmob", + "type": "object", + "additionalProperties": true, + "properties": { + "ionmob_model": { + "description": "Path to Ionmob model directory", + "type": "string", + "default": "GRUPredictor" + }, + "reference_dataset": { + "description": "Path to Ionmob reference dataset file", + "type": "string", + "default": "Meier_unimod.parquet" + }, + "tokenizer": + { + "description": "Path to tokenizer json file", + "type": "string", + "default": "tokenizer.json" + } + } } } }, From 1c4264b74f1d3572e68103b2403119983b1a4f0f Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 12 Sep 2023 13:50:55 +0200 Subject: [PATCH 03/18] ionmob ft generator internal paths --- ms2rescore/feature_generators/ionmob.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index 3c4a588a..c1f06988 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -3,6 +3,7 @@ from itertools import chain import logging import os +from pathlib import Path from ms2rescore.feature_generators._base_classes import FeatureGeneratorBase from psm_utils import PSMList @@ -15,17 +16,18 @@ logger = logging.getLogger(__name__) -ionmob_dir = os.path.dirname(os.path.realpath(ionmob.__file__)) +ionmob_dir = Path(ionmob.__file__).parent + DEFAULT_MODELS_IONMOB = { - "ionmob/pretrained_models/DeepTwoMerModel", - "ionmob/pretrained_models/GRUPredictor", - "ionmob/pretrained_models/SqrtModel", + Path("pretrained_models/DeepTwoMerModel"), + Path("pretrained_models/GRUPredictor"), + Path("pretrained_models/SqrtModel"), } DEFAULT_MODELS_DICT = { - mod.split("/")[1]: os.path.join(ionmob_dir, mod) for mod in DEFAULT_MODELS_IONMOB + str(mod_path.stem): ionmob_dir.joinpath(mod_path) for mod_path in DEFAULT_MODELS_IONMOB } -DEFAULT_TOKENIZER = os.path.join(ionmob_dir, "pretrained_models/tokenizer.json") -DEFAULT_REFERENCE_DATASET = os.path.join(ionmob_dir, "pretrained_models/Tenzer_unimod.parquet") +DEFAULT_TOKENIZER = ionmob_dir.joinpath("pretrained_models/tokenizers/tokenizer.json") +DEFAULT_REFERENCE_DATASET = ionmob_dir.joinpath("example_data/Tenzer_unimod.parquet") class IonMobFeatureGenerator(FeatureGeneratorBase): @@ -49,7 +51,9 @@ def __init__( """ super().__init__(*args, **kwargs) try: - self.ionmob_model = tf.keras.models.load_model(DEFAULT_MODELS_DICT[ionmob_model]) + self.ionmob_model = tf.keras.models.load_model( + DEFAULT_MODELS_DICT[ionmob_model].__str__() + ) except KeyError: self.ionmob_model = tf.keras.models.load_model(ionmob_model) From e9fb29ff908e2bca1680e5352949cfffef5edff2 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 12 Sep 2023 13:51:06 +0200 Subject: [PATCH 04/18] added plotply pyproject --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9bdfcc3c..7bd97507 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dependencies = [ "customtkinter>=5,<6", "mokapot>=0.9", "pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils + "plotly>=5.3.1,<6", ] [project.optional-dependencies] @@ -61,7 +62,7 @@ doc = [ "sphinx_rtd_theme", "sphinx-autobuild", ] -report = ["jinja2", "plotly"] +report = ["jinja2"] [project.urls] GitHub = "https://github.com/compomics/ms2rescore" From 1107187985c7b52fd2258efc303ab121492e84b5 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 12 Sep 2023 17:13:13 +0200 Subject: [PATCH 05/18] add ionmob to gui --- ms2rescore.spec | 2 +- ms2rescore/gui/app.py | 36 ++++++++++++++++++++++++++++++++++++ ms2rescore/gui/widgets.py | 17 ++++++++++++++--- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/ms2rescore.spec b/ms2rescore.spec index 76bc4a2c..700a0fdb 100644 --- a/ms2rescore.spec +++ b/ms2rescore.spec @@ -9,7 +9,7 @@ from ms2rescore import __version__ # Package info exe_name = "ms2rescore" -script_name = "ms2rescore/gui.py" +script_name = "ms2rescore/gui/app.py" icon = "./img/ms2rescore.ico" location = os.getcwd() project = "ms2rescore" diff --git a/ms2rescore/gui/app.py b/ms2rescore/gui/app.py index ecad6316..5e493a30 100644 --- a/ms2rescore/gui/app.py +++ b/ms2rescore/gui/app.py @@ -33,6 +33,7 @@ try: import matplotlib.pyplot as plt + plt.set_loglevel("warning") except ImportError: pass @@ -315,15 +316,21 @@ def __init__(self, *args, **kwargs): self.deeplc_config = DeepLCConfiguration(self) self.deeplc_config.grid(row=1, column=0, pady=(0, 20), sticky="nsew") + self.ionmob_config = IonmobConfiguration(self) + self.ionmob_config.grid(row=1, column=0, pady=(0, 20), sticky="nsew") + def get(self) -> Dict: """Return the configuration as a dictionary.""" ms2pip_enabled, ms2pip_config = self.ms2pip_config.get() deeplc_enabled, deeplc_config = self.deeplc_config.get() + ionmob_enabled, ionmob_config = self.ionmob_config.get() config = {} if ms2pip_enabled: config["ms2pip"] = ms2pip_config if deeplc_enabled: config["deeplc"] = deeplc_config + if ionmob_enabled: + config["ionmob"] = ionmob_config return config @@ -410,6 +417,35 @@ def get(self) -> Dict: return enabled, config +class IonmobConfiguration(ctk.CTkFrame): + def __init__(self, *args, **kwargs): + """IonMob configuration frame.""" + super().__init__(*args, **kwargs) + + self.configure(fg_color="transparent") + self.grid._columnconfigure(0, weight=1) + + self.title = widgets.Heading(self, text="Ionmob") + self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") + + self.enabled = widgets.LabeledSwitch(self, label="Enable Ionmob", default=True) + self.enabled.grid(row=1, column=0, pady=(0, 10), sticky="nsew") + + self.model = widgets.LabeledEntry( + self, + label="Name of built-in model or path to custom model", + placeholder_text="GRUPredictor", + default_value="GRUPredictor", + ) + self.model.grid(row=3, column=0, pady=(0, 10), sticky="nsew") + + def get(self) -> Dict: + """Return the configuration as a dictionary.""" + enabled = self.enabled.get() + config = {"model": self.model.get()} + return enabled, config + + class RescoringEngineConfig(ctk.CTkFrame): def __init__(self, *args, **kwargs): """Rescoring engine configuration frame.""" diff --git a/ms2rescore/gui/widgets.py b/ms2rescore/gui/widgets.py index d4b4a4f6..36526451 100644 --- a/ms2rescore/gui/widgets.py +++ b/ms2rescore/gui/widgets.py @@ -18,18 +18,29 @@ def __init__(self, *args, **kwargs): class LabeledEntry(ctk.CTkFrame): - def __init__(self, *args, label="Enter text", placeholder_text="Enter text...", **kwargs): + def __init__( + self, + *args, + label="Enter text", + placeholder_text="Enter text...", + default_value="", + **kwargs, + ): super().__init__(*args, **kwargs) self.grid_columnconfigure(0, weight=1) + self._variable = ctk.StringVar(value=default_value) + self._label = ctk.CTkLabel(self, text=label) self._label.grid(row=0, column=0, padx=0, pady=5, sticky="w") - self._entry = ctk.CTkEntry(self, placeholder_text=placeholder_text) + self._entry = ctk.CTkEntry( + self, placeholder_text=placeholder_text, textvariable=self._variable + ) self._entry.grid(row=0, column=1, padx=0, pady=5, sticky="e") def get(self): - return self._entry.get() + return self._variable.get() class LabeledEntryTextbox(ctk.CTkFrame): From 7081e2407ad4b048c0e0714bacb0ddef0e10d4e2 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Tue, 12 Sep 2023 18:12:30 +0200 Subject: [PATCH 06/18] ionmob generator fixes --- ms2rescore/feature_generators/ionmob.py | 3 +-- ms2rescore/gui/app.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index c1f06988..59e96987 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -2,10 +2,9 @@ import tensorflow as tf from itertools import chain import logging -import os from pathlib import Path -from ms2rescore.feature_generators._base_classes import FeatureGeneratorBase +from ms2rescore.feature_generators.base import FeatureGeneratorBase from psm_utils import PSMList from ionmob.preprocess.data import to_tf_dataset_inference from ionmob.utilities.utility import get_ccs_shift diff --git a/ms2rescore/gui/app.py b/ms2rescore/gui/app.py index 794bee8e..306857eb 100644 --- a/ms2rescore/gui/app.py +++ b/ms2rescore/gui/app.py @@ -353,7 +353,7 @@ def __init__(self, *args, **kwargs): self.deeplc_config.grid(row=1, column=0, pady=(0, 20), sticky="nsew") self.ionmob_config = IonmobConfiguration(self) - self.ionmob_config.grid(row=1, column=0, pady=(0, 20), sticky="nsew") + self.ionmob_config.grid(row=2, column=0, pady=(0, 20), sticky="nsew") def get(self) -> Dict: """Return the configuration as a dictionary.""" @@ -459,7 +459,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.configure(fg_color="transparent") - self.grid._columnconfigure(0, weight=1) + self.grid_columnconfigure(0, weight=1) self.title = widgets.Heading(self, text="Ionmob") self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") @@ -478,7 +478,7 @@ def __init__(self, *args, **kwargs): def get(self) -> Dict: """Return the configuration as a dictionary.""" enabled = self.enabled.get() - config = {"model": self.model.get()} + config = {"ionmob_model": self.model.get()} return enabled, config From 7f32dd19797b0fbc92e9d347e2c7c73fcbdb1715 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 13 Sep 2023 00:13:44 +0200 Subject: [PATCH 07/18] fixed total runs deeplc --- ms2rescore/feature_generators/deeplc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ms2rescore/feature_generators/deeplc.py b/ms2rescore/feature_generators/deeplc.py index b17bf344..073e0e4f 100644 --- a/ms2rescore/feature_generators/deeplc.py +++ b/ms2rescore/feature_generators/deeplc.py @@ -127,9 +127,10 @@ def add_features(self, psm_list: PSMList) -> None: # Get easy-access nested version of PSMList psm_dict = psm_list.get_psm_dict() - # Run MS²PIP for each spectrum file - total_runs = len(psm_dict.values()) + # Run DeepLC for each spectrum file current_run = 1 + total_runs = len(list(chain.from_iterable([runs.keys() for runs in psm_dict.values()]))) + for runs in psm_dict.values(): # Reset DeepLC predictor for each collection of runs self.deeplc_predictor = None From 71352dc87c7d29ec42f82795c37337d19ec406e0 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 13 Sep 2023 00:14:12 +0200 Subject: [PATCH 08/18] fixed ionmob runs --- ms2rescore/feature_generators/ionmob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index 59e96987..1358e0e6 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -85,8 +85,8 @@ def add_features(self, psm_list: PSMList) -> None: """ logger.info("Adding Ionmob-derived features to PSMs.") psm_dict = psm_list.get_psm_dict() - current_run = 0 - total_runs = len(psm_dict.values()) + current_run = 1 + total_runs = len(list(chain.from_iterable([runs.keys() for runs in psm_dict.values()]))) for runs in psm_dict.values(): for run, psms in runs.items(): From e7e8bd0f60800179d7a49f97092af59da4ed1faa Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 13 Sep 2023 00:14:25 +0200 Subject: [PATCH 09/18] changes to root mainloop --- ms2rescore/gui/app.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/ms2rescore/gui/app.py b/ms2rescore/gui/app.py index 306857eb..121980b0 100644 --- a/ms2rescore/gui/app.py +++ b/ms2rescore/gui/app.py @@ -4,6 +4,7 @@ import logging import multiprocessing import os +import sys import webbrowser from pathlib import Path from typing import Dict, List, Tuple @@ -346,21 +347,27 @@ def __init__(self, *args, **kwargs): self.configure(fg_color="transparent") self.grid_columnconfigure(0, weight=1) + self.basic_config = BasicFeatureConfiguration(self) + self.basic_config.grid(row=0, column=0, pady=(0, 20), sticky="nsew") + self.ms2pip_config = MS2PIPConfiguration(self) - self.ms2pip_config.grid(row=0, column=0, pady=(0, 20), sticky="nsew") + self.ms2pip_config.grid(row=1, column=0, pady=(0, 20), sticky="nsew") self.deeplc_config = DeepLCConfiguration(self) - self.deeplc_config.grid(row=1, column=0, pady=(0, 20), sticky="nsew") + self.deeplc_config.grid(row=2, column=0, pady=(0, 20), sticky="nsew") self.ionmob_config = IonmobConfiguration(self) - self.ionmob_config.grid(row=2, column=0, pady=(0, 20), sticky="nsew") + self.ionmob_config.grid(row=3, column=0, pady=(0, 20), sticky="nsew") def get(self) -> Dict: """Return the configuration as a dictionary.""" + basic_enabled, basic_config = self.basic_config.get() ms2pip_enabled, ms2pip_config = self.ms2pip_config.get() deeplc_enabled, deeplc_config = self.deeplc_config.get() ionmob_enabled, ionmob_config = self.ionmob_config.get() config = {} + if basic_enabled: + config["basic"] = basic_config if ms2pip_enabled: config["ms2pip"] = ms2pip_config if deeplc_enabled: @@ -370,6 +377,27 @@ def get(self) -> Dict: return config +class BasicFeatureConfiguration(ctk.CTkFrame): + def __init__(self, *args, **kwargs): + """Basic configuration frame.""" + super().__init__(*args, **kwargs) + + self.configure(fg_color="transparent") + self.grid_columnconfigure(0, weight=1) + + self.title = widgets.Heading(self, text="Basic features") + self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") + + self.enabled = widgets.LabeledSwitch(self, label="Enable Basic features", default=True) + self.enabled.grid(row=1, column=0, pady=(0, 10), sticky="nsew") + + def get(self) -> Dict: + """Return the configuration as a dictionary.""" + enabled = self.enabled.get() + config = {} + return enabled, config + + class MS2PIPConfiguration(ctk.CTkFrame): def __init__(self, *args, **kwargs): """MS²PIP configuration frame.""" @@ -500,7 +528,7 @@ def __init__(self, *args, **kwargs): def get(self) -> Dict: """Return the configuration as a dictionary.""" - return {"rescoring_engine": {self.radio_button.get().lower(): {}}} + return {self.radio_button.get().lower(): {}} def function(config): @@ -517,7 +545,7 @@ def app(): config_frame=ConfigFrame, function=function, ) - + root.protocol("WM_DELETE_WINDOW", sys.exit) root.geometry(f"{1250}x{700}") root.minsize(1000, 700) root.title("MS²Rescore") From 81f8ee9afbc360d3983ef918a91325f761e7490e Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Wed, 13 Sep 2023 17:57:21 +0200 Subject: [PATCH 10/18] update gui and minor fixes --- ms2rescore/feature_generators/deeplc.py | 2 +- ms2rescore/gui/app.py | 72 +++++++++++++++++++++++-- ms2rescore/parse_mgf.py | 1 - pyproject.toml | 5 +- 4 files changed, 71 insertions(+), 9 deletions(-) diff --git a/ms2rescore/feature_generators/deeplc.py b/ms2rescore/feature_generators/deeplc.py index 073e0e4f..4ce926f3 100644 --- a/ms2rescore/feature_generators/deeplc.py +++ b/ms2rescore/feature_generators/deeplc.py @@ -217,7 +217,7 @@ def add_features(self, psm_list: PSMList) -> None: psm["rescoring_features"].update( peptide_rt_diff_dict[psm.peptidoform.proforma.split("\\")[0]] ) - current_run += 1 + current_run += 1 # TODO: Remove when DeepLC supports PSMList directly @staticmethod diff --git a/ms2rescore/gui/app.py b/ms2rescore/gui/app.py index 121980b0..03e8dcfc 100644 --- a/ms2rescore/gui/app.py +++ b/ms2rescore/gui/app.py @@ -301,19 +301,24 @@ def __init__(self, *args, **kwargs): self.usi = widgets.LabeledSwitch(self, label="Rename PSM IDs to their USI") self.usi.grid(row=1, column=0, pady=(0, 10), sticky="nsew") + self.generate_report = widgets.LabeledSwitch( + self, label="Generate MS²Rescore report", default=True + ) + self.generate_report.grid(row=2, column=0, pady=(0, 10), sticky="nsew") + self.id_decoy_pattern = widgets.LabeledEntry(self, label="Decoy protein regex pattern") - self.id_decoy_pattern.grid(row=2, column=0, pady=(0, 10), sticky="nsew") + self.id_decoy_pattern.grid(row=3, column=0, pady=(0, 10), sticky="nsew") self.psm_id_pattern = widgets.LabeledEntry(self, label="PSM ID regex pattern") - self.psm_id_pattern.grid(row=3, column=0, pady=(0, 10), sticky="nsew") + self.psm_id_pattern.grid(row=4, column=0, pady=(0, 10), sticky="nsew") self.spectrum_id_pattern = widgets.LabeledEntry(self, label="Spectrum ID regex pattern") - self.spectrum_id_pattern.grid(row=4, column=0, pady=(0, 10), sticky="nsew") + self.spectrum_id_pattern.grid(row=5, column=0, pady=(0, 10), sticky="nsew") self.weightsfile = widgets.LabeledFileSelect( self, label="Pretrained Percolator weights", file_option="openfile" ) - self.weightsfile.grid(row=5, column=0, columnspan=2, sticky="nsew") + self.weightsfile.grid(row=6, column=0, columnspan=2, sticky="nsew") self.file_prefix = widgets.LabeledFileSelect( self, label="Filename for output files", file_option="savefile" @@ -336,6 +341,7 @@ def get(self) -> Dict: "weightsfile": self.weightsfile.get(), "output_path": self.file_prefix.get(), "config_file": self.config_file.get(), + "write_report": self.generate_report.get(), } @@ -526,9 +532,65 @@ def __init__(self, *args, **kwargs): ) self.radio_button.grid(row=0, column=0, pady=(0, 10), sticky="nsew") + self.mokapot_config = MokapotRescoringConfiguration(self) + self.mokapot_config.grid(row=1, column=0, pady=(0, 10), sticky="nsew") + + self.percolator_config = PercolatorRescoringConfiguration(self) + self.percolator_config.grid(row=2, column=0, pady=(0, 10), sticky="nsew") + + def get(self) -> Dict: + """Return the configuration as a dictionary.""" + if self.radio_button.get().lower() == "mokapot": + return {self.radio_button.get().lower(): self.mokapot_config.get()} + elif self.radio_button.get().lower() == "percolator": + return {self.radio_button.get().lower(): self.mokapot_config.get()} + + +class MokapotRescoringConfiguration(ctk.CTkFrame): + def __init__(self, *args, **kwargs): + """Rescoring engine configuration frame.""" + super().__init__(*args, **kwargs) + + self.configure(fg_color="transparent") + self.grid_columnconfigure(0, weight=1) + + self.title = widgets.Heading(self, text="Mokapot cofiguration") + self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") + + self.write_weights = widgets.LabeledSwitch(self, label="Write weightsfile", default=True) + self.write_weights.grid(row=1, column=0, pady=(0, 10), sticky="nsew") + + self.write_txt = widgets.LabeledSwitch(self, label="Write txt output file", default=True) + self.write_txt.grid(row=2, column=0, pady=(0, 10), sticky="nsew") + + self.write_flashlfq = widgets.LabeledSwitch(self, label="Write flashlfq", default=False) + self.write_flashlfq.grid(row=3, column=0, pady=(0, 10), sticky="nsew") + def get(self) -> Dict: """Return the configuration as a dictionary.""" - return {self.radio_button.get().lower(): {}} + config = { + "write_weights": self.write_weights.get(), + "write_txt": self.write_txt.get(), + "write_flashlfq": self.write_flashlfq.get(), + } + return config + + +class PercolatorRescoringConfiguration(ctk.CTkFrame): + def __init__(self, *args, **kwargs): + """Rescoring engine configuration frame.""" + super().__init__(*args, **kwargs) + + self.configure(fg_color="transparent") + self.grid_columnconfigure(0, weight=1) + + self.title = widgets.Heading(self, text="Percolator cofiguration") + self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") + + def get(self) -> Dict: + """Return the configuration as a dictionary.""" + config = {} + return config def function(config): diff --git a/ms2rescore/parse_mgf.py b/ms2rescore/parse_mgf.py index 1c25c125..abc422e0 100644 --- a/ms2rescore/parse_mgf.py +++ b/ms2rescore/parse_mgf.py @@ -35,7 +35,6 @@ def parse_mgf_title_rt(path_to_mgf: Union[str, os.PathLike]) -> Dict[str, float] rt = None retention_times[title] = rt - print(retention_times) if any(list(retention_times.values())): return retention_times else: diff --git a/pyproject.toml b/pyproject.toml index 63492ecd..f72f3052 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,10 +37,10 @@ dependencies = [ "rich>=12", "pyteomics>=4.1.0,<5", "lxml>=4.5,<5", - "ms2pip>=4.0.0-dev,<5", + "ms2pip>=4.0.0-dev1,<5", "click>=7", "cascade-config>=0.4.0,<2", - "deeplc>=1.2.1", + "deeplc>=2.2.1", "deeplcretrainer==0.1.17", # TODO: Release version pin "tomli>=2; python_version < '3.11'", "psm_utils>=0.3", @@ -49,6 +49,7 @@ dependencies = [ "pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils "jinja2>=3", "plotly>=5", + "ionmob", ] [project.optional-dependencies] From 6ecd37894771127c3d0a4a6b2ebec0739a2bd05e Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Thu, 14 Sep 2023 09:39:31 +0200 Subject: [PATCH 11/18] adjusted spec file --- ms2rescore.spec | 2 +- ms2rescore/feature_generators/ms2pip.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ms2rescore.spec b/ms2rescore.spec index 700a0fdb..c8c50b52 100644 --- a/ms2rescore.spec +++ b/ms2rescore.spec @@ -9,7 +9,7 @@ from ms2rescore import __version__ # Package info exe_name = "ms2rescore" -script_name = "ms2rescore/gui/app.py" +script_name = "ms2rescore/gui/__main__.py" icon = "./img/ms2rescore.ico" location = os.getcwd() project = "ms2rescore" diff --git a/ms2rescore/feature_generators/ms2pip.py b/ms2rescore/feature_generators/ms2pip.py index c4fdf39a..d3829501 100644 --- a/ms2rescore/feature_generators/ms2pip.py +++ b/ms2rescore/feature_generators/ms2pip.py @@ -173,9 +173,15 @@ def add_features(self, psm_list: PSMList) -> None: """ logger.info("Adding MS²PIP-derived features to PSMs.") - for runs in psm_list.get_psm_dict().values(): + psm_dict = psm_list.get_psm_dict() + current_run = 1 + total_runs = len(list(chain.from_iterable([runs.keys() for runs in psm_dict.values()]))) + + for runs in psm_dict.values(): for run, psms in runs.items(): - logger.info(f"Running MS²PIP for PSMs from run `{run}`...") + logger.info( + f"Running MS²PIP for PSMs from run ({current_run}/{total_runs}) `{run}`..." + ) psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) spectrum_filename = infer_spectrum_path(self.spectrum_path, run) logger.debug(f"Using spectrum file `{spectrum_filename}`") From 64e615bb9928299928adc08ee4495207e768cd65 Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Thu, 14 Sep 2023 11:17:11 +0200 Subject: [PATCH 12/18] contextlib addition ionmob, logging fix ms2pip --- ms2rescore/feature_generators/ionmob.py | 106 +++++++++++++----------- ms2rescore/feature_generators/ms2pip.py | 1 + 2 files changed, 57 insertions(+), 50 deletions(-) diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index 1358e0e6..183cd233 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -3,6 +3,8 @@ from itertools import chain import logging from pathlib import Path +import contextlib +import os from ms2rescore.feature_generators.base import FeatureGeneratorBase from psm_utils import PSMList @@ -54,7 +56,7 @@ def __init__( DEFAULT_MODELS_DICT[ionmob_model].__str__() ) except KeyError: - self.ionmob_model = tf.keras.models.load_model(ionmob_model) + self.ionmob_model = tf.keras.models.load_model(Path(ionmob_model).absolute().__str__()) self.reference_dataset = pd.read_parquet(reference_dataset) self.tokenizer = tokenizer_from_json(tokenizer) @@ -93,60 +95,64 @@ def add_features(self, psm_list: PSMList) -> None: logger.info( f"Running Ionmob for PSMs from run ({current_run}/{total_runs}): `{run}`..." ) - psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) - psm_list_run_df = psm_list_run.to_dataframe() - - # prepare dataframes for CCS prediction - psm_list_run_df["charge"] = [ - peptidoform.precursor_charge for peptidoform in psm_list_run_df["peptidoform"] - ] - psm_list_run_df = psm_list_run_df[ - psm_list_run_df["charge"] < 5 - ] # predictions do not go higher for ionmob - - psm_list_run_df["sequence-tokenized"] = psm_list_run_df.apply( - lambda x: self.tokenize_peptidoform(x["peptidoform"]), axis=1 - ) - psm_list_run_df = psm_list_run_df[ - psm_list_run_df.apply( - lambda x: self._is_valid_tokenized_sequence(x["sequence-tokenized"]), + with contextlib.redirect_stdout( + open(os.devnull, "w") + ) if not self._verbose else contextlib.nullcontext(): + psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) + psm_list_run_df = psm_list_run.to_dataframe() + + # prepare dataframes for CCS prediction + psm_list_run_df["charge"] = [ + peptidoform.precursor_charge + for peptidoform in psm_list_run_df["peptidoform"] + ] + psm_list_run_df = psm_list_run_df[ + psm_list_run_df["charge"] < 5 + ] # predictions do not go higher for ionmob + + psm_list_run_df["sequence-tokenized"] = psm_list_run_df.apply( + lambda x: self.tokenize_peptidoform(x["peptidoform"]), axis=1 + ) + psm_list_run_df = psm_list_run_df[ + psm_list_run_df.apply( + lambda x: self._is_valid_tokenized_sequence(x["sequence-tokenized"]), + axis=1, + ) + ] + + psm_list_run_df["mz"] = psm_list_run_df.apply( + lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1 + ) # use precursor m/z from PSMs? + + psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( + lambda x: reduced_mobility_to_ccs(x["ion_mobility"], x["mz"], x["charge"]), axis=1, ) - ] - - psm_list_run_df["mz"] = psm_list_run_df.apply( - lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1 - ) # use precursor m/z from PSMs? - - psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( - lambda x: reduced_mobility_to_ccs(x["ion_mobility"], x["mz"], x["charge"]), - axis=1, - ) - # calibrate CCS values - shift_factor = self.calculate_ccs_shift(psm_list_run_df) - psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( - lambda x: x["ccs_observed"] + shift_factor, axis=1 - ) - # predict CCS values - tf_ds = to_tf_dataset_inference( - psm_list_run_df["mz"], - psm_list_run_df["charge"], - psm_list_run_df["sequence-tokenized"], - self.tokenizer, - ) + # calibrate CCS values + shift_factor = self.calculate_ccs_shift(psm_list_run_df) + psm_list_run_df["ccs_observed"] = psm_list_run_df.apply( + lambda x: x["ccs_observed"] + shift_factor, axis=1 + ) + # predict CCS values + tf_ds = to_tf_dataset_inference( + psm_list_run_df["mz"], + psm_list_run_df["charge"], + psm_list_run_df["sequence-tokenized"], + self.tokenizer, + ) - psm_list_run_df["ccs_predicted"], _ = self.ionmob_model.predict(tf_ds) + psm_list_run_df["ccs_predicted"], _ = self.ionmob_model.predict(tf_ds) - # calculate CCS features - ccs_features = self._calculate_features(psm_list_run_df) + # calculate CCS features + ccs_features = self._calculate_features(psm_list_run_df) - # add CCS features to PSMs - for psm in psm_list_run: - try: - psm["rescoring_features"].update(ccs_features[psm.spectrum_id]) - except KeyError: - psm["rescoring_features"].update({}) - current_run += 1 + # add CCS features to PSMs + for psm in psm_list_run: + try: + psm["rescoring_features"].update(ccs_features[psm.spectrum_id]) + except KeyError: + psm["rescoring_features"].update({}) + current_run += 1 def _calculate_features(self, feature_df): """Get ccs features for PSMs.""" diff --git a/ms2rescore/feature_generators/ms2pip.py b/ms2rescore/feature_generators/ms2pip.py index d3829501..dde2dbc3 100644 --- a/ms2rescore/feature_generators/ms2pip.py +++ b/ms2rescore/feature_generators/ms2pip.py @@ -195,6 +195,7 @@ def add_features(self, psm_list: PSMList) -> None: processes=self.processes, ) self._calculate_features(psm_list_run, ms2pip_results) + current_run += 1 def _calculate_features( self, psm_list: PSMList, ms2pip_results: List[ProcessingResult] From 5b72f3ab5c8212b8915e4a483c454944d8016cab Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Thu, 14 Sep 2023 13:01:21 +0200 Subject: [PATCH 13/18] add verbose --- ms2rescore/feature_generators/ionmob.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index 183cd233..91ebf9a9 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -61,6 +61,8 @@ def __init__( self.reference_dataset = pd.read_parquet(reference_dataset) self.tokenizer = tokenizer_from_json(tokenizer) + self._verbose = logger.getEffectiveLevel() <= logging.DEBUG + @property def feature_names(self): return [ From b9ea72ed69347ca98cbd28a8cde87742495b016e Mon Sep 17 00:00:00 2001 From: RalfG Date: Thu, 14 Sep 2023 18:33:28 +0200 Subject: [PATCH 14/18] ionmob: Make optional dependency, add schema docs, add docstrings... --- .pre-commit-config.yaml | 6 + docs/source/config_schema.md | 5 + ms2rescore/core.py | 2 +- ms2rescore/feature_generators/__init__.py | 2 +- ms2rescore/feature_generators/base.py | 6 + ms2rescore/feature_generators/deeplc.py | 2 +- ms2rescore/feature_generators/ionmob.py | 161 ++++++++++++--------- ms2rescore/feature_generators/ms2pip.py | 2 +- ms2rescore/package_data/config_schema.json | 4 +- pyproject.toml | 21 +-- 10 files changed, 130 insertions(+), 81 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dae77c4d..a0ec6a48 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,12 @@ repos: - id: end-of-file-fixer - id: trailing-whitespace + # - repo: https://github.com/pycqa/isort + # rev: 5.11.2 + # hooks: + # - id: isort + # name: isort (python) + - repo: https://github.com/psf/black rev: 22.10.0 hooks: diff --git a/docs/source/config_schema.md b/docs/source/config_schema.md index 97854027..61d53bc4 100644 --- a/docs/source/config_schema.md +++ b/docs/source/config_schema.md @@ -9,6 +9,7 @@ - **`ms2pip`**: Refer to *[#/definitions/ms2pip](#definitions/ms2pip)*. - **`deeplc`**: Refer to *[#/definitions/deeplc](#definitions/deeplc)*. - **`maxquant`**: Refer to *[#/definitions/maxquant](#definitions/maxquant)*. + - **`ionmob`**: Refer to *[#/definitions/ionmob](#definitions/ionmob)*. - **`rescoring_engine`** *(object)*: Rescoring engine to use and its configuration. Leave empty to skip rescoring and write features to file. Default: `{"mokapot": {}}`. - **`.*`**: Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*. - **`percolator`**: Refer to *[#/definitions/percolator](#definitions/percolator)*. @@ -67,6 +68,10 @@ - *integer* - *number* - **`maxquant`** *(object)*: MaxQuant feature generator configuration. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. +- **`ionmob`** *(object)*: Ion mobility feature generator configuration using Ionmob. Can contain additional properties. Refer to *[#/definitions/feature_generator](#definitions/feature_generator)*. + - **`ionmob_model`** *(string)*: Path to Ionmob model directory. Default: `"GRUPredictor"`. + - **`reference_dataset`** *(string)*: Path to Ionmob reference dataset file. Default: `"Meier_unimod.parquet"`. + - **`tokenizer`** *(string)*: Path to tokenizer json file. Default: `"tokenizer.json"`. - **`mokapot`** *(object)*: Mokapot rescoring engine configuration. Additional properties are passed to the Mokapot brew function. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*. - **`write_weights`** *(boolean)*: Write Mokapot weights to a text file. Default: `false`. - **`write_txt`** *(boolean)*: Write Mokapot results to a text file. Default: `false`. diff --git a/ms2rescore/core.py b/ms2rescore/core.py index 5018573b..b2f736b1 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -84,7 +84,7 @@ def rescore(configuration: Dict, psm_list: PSMList = None) -> None: psm_list.add_fixed_modifications(config["fixed_modifications"]) psm_list.apply_fixed_modifications() - logger.debug("Applying `psm_id_pattern`...") + logger.debug("Applying 'psm_id_pattern'...") if config["psm_id_pattern"]: pattern = re.compile(config["psm_id_pattern"]) new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]] diff --git a/ms2rescore/feature_generators/__init__.py b/ms2rescore/feature_generators/__init__.py index ed6cfe32..9424448a 100644 --- a/ms2rescore/feature_generators/__init__.py +++ b/ms2rescore/feature_generators/__init__.py @@ -4,9 +4,9 @@ from ms2rescore.feature_generators.basic import BasicFeatureGenerator from ms2rescore.feature_generators.deeplc import DeepLCFeatureGenerator +from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator from ms2rescore.feature_generators.maxquant import MaxQuantFeatureGenerator from ms2rescore.feature_generators.ms2pip import MS2PIPFeatureGenerator -from ms2rescore.feature_generators.ionmob import IonMobFeatureGenerator FEATURE_GENERATORS = { "basic": BasicFeatureGenerator, diff --git a/ms2rescore/feature_generators/base.py b/ms2rescore/feature_generators/base.py index a8268306..d76c3be1 100644 --- a/ms2rescore/feature_generators/base.py +++ b/ms2rescore/feature_generators/base.py @@ -17,3 +17,9 @@ def feature_names(self): @abstractmethod def add_features(psm_list: PSMList): pass + + +class FeatureGeneratorException(Exception): + """Base class for exceptions raised by feature generators.""" + + pass diff --git a/ms2rescore/feature_generators/deeplc.py b/ms2rescore/feature_generators/deeplc.py index d11c4cc1..dcf84ea6 100644 --- a/ms2rescore/feature_generators/deeplc.py +++ b/ms2rescore/feature_generators/deeplc.py @@ -129,7 +129,7 @@ def add_features(self, psm_list: PSMList) -> None: # Run DeepLC for each spectrum file current_run = 1 - total_runs = len(list(chain.from_iterable([runs.keys() for runs in psm_dict.values()]))) + total_runs = sum(len(runs) for runs in psm_dict.values()) for runs in psm_dict.values(): # Reset DeepLC predictor for each collection of runs diff --git a/ms2rescore/feature_generators/ionmob.py b/ms2rescore/feature_generators/ionmob.py index 91ebf9a9..12f75f82 100644 --- a/ms2rescore/feature_generators/ionmob.py +++ b/ms2rescore/feature_generators/ionmob.py @@ -1,65 +1,96 @@ -import pandas as pd -import tensorflow as tf -from itertools import chain -import logging -from pathlib import Path import contextlib +import logging import os +from itertools import chain +from pathlib import Path +from typing import Dict, Optional -from ms2rescore.feature_generators.base import FeatureGeneratorBase -from psm_utils import PSMList -from ionmob.preprocess.data import to_tf_dataset_inference -from ionmob.utilities.utility import get_ccs_shift -from ionmob.utilities.tokenization import tokenizer_from_json -from ionmob.utilities.chemistry import VARIANT_DICT, reduced_mobility_to_ccs, calculate_mz -import ionmob +import pandas as pd +import tensorflow as tf +from psm_utils import Peptidoform, PSMList +from ms2rescore.feature_generators.base import FeatureGeneratorBase, FeatureGeneratorException -logger = logging.getLogger(__name__) +try: + from ionmob import __file__ as ionmob_file + from ionmob.preprocess.data import to_tf_dataset_inference + from ionmob.utilities.chemistry import VARIANT_DICT, calculate_mz, reduced_mobility_to_ccs + from ionmob.utilities.tokenization import tokenizer_from_json + from ionmob.utilities.utility import get_ccs_shift +except ImportError: + IONMOB_INSTALLED = False +else: + IONMOB_INSTALLED = True -ionmob_dir = Path(ionmob.__file__).parent +logger = logging.getLogger(__name__) -DEFAULT_MODELS_IONMOB = { - Path("pretrained_models/DeepTwoMerModel"), - Path("pretrained_models/GRUPredictor"), - Path("pretrained_models/SqrtModel"), -} -DEFAULT_MODELS_DICT = { - str(mod_path.stem): ionmob_dir.joinpath(mod_path) for mod_path in DEFAULT_MODELS_IONMOB -} -DEFAULT_TOKENIZER = ionmob_dir.joinpath("pretrained_models/tokenizers/tokenizer.json") -DEFAULT_REFERENCE_DATASET = ionmob_dir.joinpath("example_data/Tenzer_unimod.parquet") +if IONMOB_INSTALLED: + IONMOB_DIR = Path(ionmob_file).parent + DEFAULT_MODELS_IONMOB = { + Path("pretrained_models/DeepTwoMerModel"), + Path("pretrained_models/GRUPredictor"), + Path("pretrained_models/SqrtModel"), + } + DEFAULT_MODELS_DICT = { + mod_path.stem: IONMOB_DIR / mod_path for mod_path in DEFAULT_MODELS_IONMOB + } + DEFAULT_TOKENIZER = IONMOB_DIR / "pretrained_models/tokenizers/tokenizer.json" + DEFAULT_REFERENCE_DATASET = IONMOB_DIR / "example_data/Tenzer_unimod.parquet" class IonMobFeatureGenerator(FeatureGeneratorBase): - """Ionmob Collision Cross Section (CCS)-based feature generator.""" + """Ionmob collisional cross section (CCS)-based feature generator.""" def __init__( self, *args, ionmob_model: str = "GRUPredictor", - reference_dataset: str = DEFAULT_REFERENCE_DATASET, - tokenizer: str = DEFAULT_TOKENIZER, + reference_dataset: Optional[str] = None, + tokenizer: Optional[str] = None, **kwargs, ) -> None: """ - Ionmob Collision Cross Section (CCS)-based feature generator. + Ionmob collisional cross section (CCS)-based feature generator. Parameters ---------- - #TODO + *args + Additional arguments passed to the base class. + ionmob_model + Path to a trained Ionmob model or one of the default models (``DeepTwoMerModel``, + ``GRUPredictor``, or ``SqrtModel``). Default: ``GRUPredictor``. + reference_dataset + Path to a reference dataset for CCS shift calculation. Uses the default reference + dataset if not specified. + tokenizer + Path to a tokenizer or one of the default tokenizers. Uses the default tokenizer if + not specified. + **kwargs + Additional keyword arguments passed to the base class. """ super().__init__(*args, **kwargs) - try: - self.ionmob_model = tf.keras.models.load_model( - DEFAULT_MODELS_DICT[ionmob_model].__str__() + + # Check if Ionmob could be imported + if not IONMOB_INSTALLED: + raise ImportError( + "Ionmob not installed. Please install Ionmob to use this feature generator." ) - except KeyError: - self.ionmob_model = tf.keras.models.load_model(Path(ionmob_model).absolute().__str__()) - self.reference_dataset = pd.read_parquet(reference_dataset) - self.tokenizer = tokenizer_from_json(tokenizer) + # Get model from file or one of the default models + if Path(ionmob_model).is_file(): + self.ionmob_model = tf.keras.models.load_model(ionmob_model) + elif ionmob_model in DEFAULT_MODELS_DICT: + self.ionmob_model = tf.keras.models.load_model( + DEFAULT_MODELS_DICT[ionmob_model].as_posix() + ) + else: + raise IonmobException( + f"Invalid Ionmob model: {ionmob_model}. Should be path to a model file or one of " + f"the default models: {DEFAULT_MODELS_DICT.keys()}." + ) + self.reference_dataset = pd.read_parquet(reference_dataset or DEFAULT_REFERENCE_DATASET) + self.tokenizer = tokenizer_from_json(tokenizer or DEFAULT_TOKENIZER) self._verbose = logger.getEffectiveLevel() <= logging.DEBUG @@ -74,7 +105,8 @@ def feature_names(self): ] @property - def allowed_mods(self): + def allowed_modifications(self): + """Return a list of modifications that are allowed in ionmob.""" return [token for aa_tokens in VARIANT_DICT.values() for token in aa_tokens] def add_features(self, psm_list: PSMList) -> None: @@ -90,7 +122,7 @@ def add_features(self, psm_list: PSMList) -> None: logger.info("Adding Ionmob-derived features to PSMs.") psm_dict = psm_list.get_psm_dict() current_run = 1 - total_runs = len(list(chain.from_iterable([runs.keys() for runs in psm_dict.values()]))) + total_runs = sum(len(runs) for runs in psm_dict.values()) for runs in psm_dict.values(): for run, psms in runs.items(): @@ -103,7 +135,7 @@ def add_features(self, psm_list: PSMList) -> None: psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values()))) psm_list_run_df = psm_list_run.to_dataframe() - # prepare dataframes for CCS prediction + # prepare data frames for CCS prediction psm_list_run_df["charge"] = [ peptidoform.precursor_charge for peptidoform in psm_list_run_df["peptidoform"] @@ -156,9 +188,8 @@ def add_features(self, psm_list: PSMList) -> None: psm["rescoring_features"].update({}) current_run += 1 - def _calculate_features(self, feature_df): - """Get ccs features for PSMs.""" - + def _calculate_features(self, feature_df: pd.DataFrame) -> Dict[str, Dict[str, float]]: + """Get CCS features for PSMs.""" ccs_features = {} for row in feature_df.itertuples(): ccs_features[row.spectrum_id] = { @@ -169,21 +200,11 @@ def _calculate_features(self, feature_df): "perc_ccs_error": ((abs(row.ccs_observed - row.ccs_predicted)) / row.ccs_observed) * 100, } - return ccs_features @staticmethod - def tokenize_peptidoform(peptidoform): - """ - Tokenize proforma sequence and add modifications. - - Args: - seq (str): Peptide sequence. - peprec_mod (str): Peptide modifications in the format "loc1|mod1|loc2|mod2|...". - - Returns: - list: A list of tokenized and modified peptide sequence. - """ + def tokenize_peptidoform(peptidoform: Peptidoform) -> list: + """Tokenize proforma sequence and add modifications.""" tokenized_seq = [] if peptidoform.properties["n_term"]: @@ -207,16 +228,15 @@ def tokenize_peptidoform(peptidoform): return tokenized_seq - def calculate_ccs_shift(self, psm_dataframe): + def calculate_ccs_shift(self, psm_dataframe: pd.DataFrame) -> float: """ Apply CCS shift to CCS values. - Args: - peprec (pandas.DataFrame): Modified and parsed Peprec data. - reference (str): Path to the reference data used for CCS shift calculation. + Parameters + ---------- + psm_dataframe + Dataframe with PSMs as returned by :py:meth:`psm_utils.PSMList.to_dataframe`. - Returns: - pandas.DataFrame: Peprec data with CCS values after applying the shift. """ df = psm_dataframe.copy() df.rename({"ccs_observed": "ccs"}, axis=1, inplace=True) @@ -238,14 +258,25 @@ def _is_valid_tokenized_sequence(self, tokenized_seq): """ Check if peptide sequence contains invalid tokens. - Args: - tokenized_seq (list): Tokenized peptide sequence. + Parameters + ---------- + tokenized_seq + Tokenized peptide sequence. + + Returns + ------- + bool + False if invalid tokens are present, True otherwise. - Returns: - bool: False if invalid tokens are present, True otherwise. """ for token in tokenized_seq: - if token not in self.allowed_mods: + if token not in self.allowed_modifications: logger.debug(f"Invalid modification found: {token}") return False return True + + +class IonmobException(FeatureGeneratorException): + """Exception raised by Ionmob feature generator.""" + + pass diff --git a/ms2rescore/feature_generators/ms2pip.py b/ms2rescore/feature_generators/ms2pip.py index dde2dbc3..1f9a668c 100644 --- a/ms2rescore/feature_generators/ms2pip.py +++ b/ms2rescore/feature_generators/ms2pip.py @@ -175,7 +175,7 @@ def add_features(self, psm_list: PSMList) -> None: logger.info("Adding MS²PIP-derived features to PSMs.") psm_dict = psm_list.get_psm_dict() current_run = 1 - total_runs = len(list(chain.from_iterable([runs.keys() for runs in psm_dict.values()]))) + total_runs = sum(len(runs) for runs in psm_dict.values()) for runs in psm_dict.values(): for run, psms in runs.items(): diff --git a/ms2rescore/package_data/config_schema.json b/ms2rescore/package_data/config_schema.json index 327069d7..7feef12d 100644 --- a/ms2rescore/package_data/config_schema.json +++ b/ms2rescore/package_data/config_schema.json @@ -29,7 +29,6 @@ }, "ionmob": { "$ref": "#/definitions/ionmob" - } }, "default": { @@ -213,8 +212,7 @@ "type": "string", "default": "Meier_unimod.parquet" }, - "tokenizer": - { + "tokenizer": { "description": "Path to tokenizer json file", "type": "string", "default": "tokenizer.json" diff --git a/pyproject.toml b/pyproject.toml index f72f3052..cbc5b9b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,27 +32,27 @@ classifiers = [ dynamic = ["version"] requires-python = ">=3.8" dependencies = [ - "numpy>=1.16.0,<2", - "pandas>=0.24.0,<3", # TODO: Check compatibility for v2 + "numpy>=1.16.0", + "pandas>=1.0", "rich>=12", - "pyteomics>=4.1.0,<5", - "lxml>=4.5,<5", - "ms2pip>=4.0.0-dev1,<5", + "pyteomics>=4.1.0", + "lxml>=4.5", + "ms2pip>=4.0.0-dev5", "click>=7", - "cascade-config>=0.4.0,<2", - "deeplc>=2.2.1", + "cascade-config>=0.4.0", + "deeplc>=2.2", "deeplcretrainer==0.1.17", # TODO: Release version pin "tomli>=2; python_version < '3.11'", - "psm_utils>=0.3", + "psm_utils>=0.4", "customtkinter>=5,<6", "mokapot>=0.9", "pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils "jinja2>=3", "plotly>=5", - "ionmob", ] [project.optional-dependencies] +ionmob = ["ionmob", "tensorflow"] dev = ["ruff", "black", "pytest", "pytest-cov", "pre-commit"] docs = [ "sphinx", @@ -81,6 +81,9 @@ ms2rescore-report = "ms2rescore.report.__main__:main" requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" +[tool.isort] +profile = "black" + [tool.black] line-length = 99 target-version = ['py38'] From 913f937aa3f8ad14290aee7f2dcffdde26968bde Mon Sep 17 00:00:00 2001 From: Ralf Gabriels Date: Fri, 15 Sep 2023 17:17:26 +0200 Subject: [PATCH 15/18] Undo edits to core.py to avoid merge conflicts --- ms2rescore/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ms2rescore/core.py b/ms2rescore/core.py index b2f736b1..5018573b 100644 --- a/ms2rescore/core.py +++ b/ms2rescore/core.py @@ -84,7 +84,7 @@ def rescore(configuration: Dict, psm_list: PSMList = None) -> None: psm_list.add_fixed_modifications(config["fixed_modifications"]) psm_list.apply_fixed_modifications() - logger.debug("Applying 'psm_id_pattern'...") + logger.debug("Applying `psm_id_pattern`...") if config["psm_id_pattern"]: pattern = re.compile(config["psm_id_pattern"]) new_ids = [_match_psm_ids(old_id, pattern) for old_id in psm_list["spectrum_id"]] From 9007588db2ef0d7e8c927ef630830d3bbce0f3b1 Mon Sep 17 00:00:00 2001 From: RalfG Date: Mon, 18 Sep 2023 13:41:51 +0200 Subject: [PATCH 16/18] Fix ms2pip version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cbc5b9b8..514b6e81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "rich>=12", "pyteomics>=4.1.0", "lxml>=4.5", - "ms2pip>=4.0.0-dev5", + "ms2pip>=4.0.0-dev4", "click>=7", "cascade-config>=0.4.0", "deeplc>=2.2", From 778edb810f41aab7ef30ff0a06f96f26162a6f4b Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Mon, 18 Sep 2023 14:26:00 +0200 Subject: [PATCH 17/18] updated gitignore and pytproject --- .gitignore | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c3da1a3b..3a07570c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ steps.txt old_files/ prepare_pin_files.py *.jar -ms2rescore-v3.0.0.dev3.tar +*.tar # Ruff .ruff_cache/ diff --git a/pyproject.toml b/pyproject.toml index cbc5b9b8..514b6e81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "rich>=12", "pyteomics>=4.1.0", "lxml>=4.5", - "ms2pip>=4.0.0-dev5", + "ms2pip>=4.0.0-dev4", "click>=7", "cascade-config>=0.4.0", "deeplc>=2.2", From 96c708dd898204b637f707a29a00966beb0c119c Mon Sep 17 00:00:00 2001 From: ArthurDeclercq Date: Mon, 18 Sep 2023 14:30:48 +0200 Subject: [PATCH 18/18] updated ionmob version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 514b6e81..6952db3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ dependencies = [ ] [project.optional-dependencies] -ionmob = ["ionmob", "tensorflow"] +ionmob = ["ionmob>=0.2", "tensorflow"] dev = ["ruff", "black", "pytest", "pytest-cov", "pre-commit"] docs = [ "sphinx",