From 707fa609a5d23217f9166bb6199f0d87ea7f3d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melanie=20C=2E=20F=C3=B6ll?= Date: Sun, 1 Sep 2024 20:18:07 +0200 Subject: [PATCH 1/4] Update tutorial Update Uniprot ID tool to Version 0.5, which is installed on all main Galaxy servers and gives correct results --- .../proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md index f866dae339a37f..48902d59438ba9 100644 --- a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md +++ b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md @@ -461,11 +461,11 @@ In addition we retrieve for each Uniprot ID the corresponding protein names from > MSstats visualizations > -> 1. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.2) %} with the following parameters: +> 1. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.5) %} with the following parameters: > - {% icon param-file %} *"Input file with IDs"*: `metastasized join` (output of **Join** {% icon tool %}) > - *"ID column"*: `c1` > - *"Do you want to map IDs or retrieve data from UniProt"*: `Retrieve: request entries by uniprot accession using batch retrieval` -> 2. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.2) %} with the following parameters: +> 2. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.5) %} with the following parameters: > - {% icon param-file %} *"Input file with IDs"*: `rdeb join` (output of **Join** {% icon tool %}) > - *"ID column"*: `c1` > - *"Do you want to map IDs or retrieve data from UniProt"*: `Retrieve: request entries by uniprot accession using batch retrieval` From 6870c2e394eeee5b3b92f560f81a95fd4327dfd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melanie=20C=2E=20F=C3=B6ll?= Date: Tue, 3 Sep 2024 10:14:51 +0200 Subject: [PATCH 2/4] Adding citations in bib file --- .../maxquant-msstats-dda-lfq/tutorial.bib | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib index 754c04639efb8e..017f34394116e9 100644 --- a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib +++ b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib @@ -1,30 +1,28 @@ - -# This is the bibliography file for your tutorial. -# -# To add bibliography (bibtex) entries here, follow these steps: -# 1) Find the DOI for the article you want to cite -# 2) Go to https://doi2bib.org and fill in the DOI -# 3) Copy the resulting bibtex entry into this file -# -# To cite the example below, in your tutorial.md file -# use {% Batut2018 %} - -@article{Batut2018, - doi = {10.1016/j.cels.2018.05.012}, - url = {https://doi.org/10.1016/j.cels.2018.05.012}, +@article{Fll2018, + title = {Identification of tissue damage, extracellular matrix remodeling and bacterial challenge as common mechanisms associated with high-risk cutaneous squamous cell carcinomas}, + volume = {66}, + ISSN = {0945-053X}, + url = {http://dx.doi.org/10.1016/j.matbio.2017.11.004}, + DOI = {10.1016/j.matbio.2017.11.004}, + journal = {Matrix Biology}, + publisher = {Elsevier BV}, + author = {F\"{o}ll, Melanie C. and Fahrner, Matthias and Gretzmeier, Christine and Thoma, K\"{a}the and Biniossek, Martin L. and Kiritsi, Dimitra and Meiss, Frank and Schilling, Oliver and Nystr\"{o}m, Alexander and Kern, Johannes S.}, year = {2018}, - month = jun, - publisher = {Elsevier {BV}}, - volume = {6}, - number = {6}, - pages = {752--758.e1}, - author = {B{\'{e}}r{\'{e}}nice Batut and Saskia Hiltemann and Andrea Bagnacani and Dannon Baker and Vivek Bhardwaj and - Clemens Blank and Anthony Bretaudeau and Loraine Brillet-Gu{\'{e}}guen and Martin {\v{C}}ech and John Chilton - and Dave Clements and Olivia Doppelt-Azeroual and Anika Erxleben and Mallory Ann Freeberg and Simon Gladman and - Youri Hoogstrate and Hans-Rudolf Hotz and Torsten Houwaart and Pratik Jagtap and Delphine Larivi{\`{e}}re and - Gildas Le Corguill{\'{e}} and Thomas Manke and Fabien Mareuil and Fidel Ram{\'{i}}rez and Devon Ryan and - Florian Christoph Sigloch and Nicola Soranzo and Joachim Wolff and Pavankumar Videm and Markus Wolfien and - Aisanjiang Wubuli and Dilmurat Yusuf and James Taylor and Rolf Backofen and Anton Nekrutenko and Bj\"{o}rn Gr\"{u}ning}, - title = {Community-Driven Data Analysis Training for Biology}, - journal = {Cell Systems} -} \ No newline at end of file + month = mar, + pages = {1–21} +} + +@article{Choi2014, + title = {MSstats: an R package for statistical analysis of quantitative mass spectrometry-based proteomic experiments}, + volume = {30}, + ISSN = {1367-4811}, + url = {http://dx.doi.org/10.1093/bioinformatics/btu305}, + DOI = {10.1093/bioinformatics/btu305}, + number = {17}, + journal = {Bioinformatics}, + publisher = {Oxford University Press (OUP)}, + author = {Choi, Meena and Chang, Ching-Yun and Clough, Timothy and Broudy, Daniel and Killeen, Trevor and MacLean, Brendan and Vitek, Olga}, + year = {2014}, + month = may, + pages = {2524–2526} +} From 4b51a6202ecc930f1c0995c0747b8db844078457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melanie=20C=2E=20F=C3=B6ll?= Date: Tue, 3 Sep 2024 10:19:49 +0200 Subject: [PATCH 3/4] Adding links to bib citations in tutorial --- .../tutorials/maxquant-msstats-dda-lfq/tutorial.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md index 48902d59438ba9..a07e564dcb261c 100644 --- a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md +++ b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md @@ -42,7 +42,7 @@ recordings: Modern mass spectrometry-based proteomics enables the identification and quantification of thousands of proteins. Therefore, quantitative mass spectrometry represents an indispensable technology for biological and clinical research. Statistical analyses are required for the unbiased answering of scientific questions and to uncover all important information in the proteomic data. Classical statistical approaches and methods from other omics technologies are not ideal because they do not take into account the speciality of mass spectrometry data that include several thousands of proteins but often only a few dozens of samples (referred to as ‘curse of dimensionality’) and stochastic data properties that reflect sample preparation and spectral acquisition (Choi 2014). In this training we will cover the full analysis workflow from label-free, data dependent acquisition (DDA) raw data to statistical results. We’ll use two popular quantitative proteomics software: MaxQuant and MSstats. MaxQuant allows protein identification and quantification for many different kinds of proteomics data (Cox and Mann 2008). In case you have no previous experience with MaxQuant, we recommend to go through the [MaxQuant beginners tutorial]({{site.baseurl}}/topics/proteomics/tutorials/maxquant-label-free/tutorial.html) before. MSstats provides statistical functionalities to find differentially abundant peptides or proteins from data dependent acquisition (DDA), data independent acquisition (DIA) or single reaction monitoring (SRM) proteomic experiments. -The training dataset consists of a skin cancer cohort of 19 patients, which is a subset of a [published study](https://doi.org/10.1016/j.matbio.2017.11.004). One fifth of all non melanoma skin cancers are cutaneous squamous cell carcinomas (cSCC) that mainly derive from exposure to ultraviolet light. Most cSCC have a good prognosis but the few metastasizing cSCC have dramatically increased mortality. Here, we compare these metastasizing cSCC to cSCC in patients with the genetic disease recessive dystrophic epidermolysis bullosa (RDEB). RDEB is a genetic skin blistering and extracellular matrix disease caused by collagen VII deficiency. To investigate molecular differences between these two aggressive cSCCs with different origin, we used global proteomic analysis of formalin-fixed paraffin-embedded human cSCC tissues. +The training dataset consists of a skin cancer cohort of 19 patients, which is a subset of a published study {% cite Fll2018 %}. One fifth of all non melanoma skin cancers are cutaneous squamous cell carcinomas (cSCC) that mainly derive from exposure to ultraviolet light. Most cSCC have a good prognosis but the few metastasizing cSCC have dramatically increased mortality. Here, we compare these metastasizing cSCC to cSCC in patients with the genetic disease recessive dystrophic epidermolysis bullosa (RDEB). RDEB is a genetic skin blistering and extracellular matrix disease caused by collagen VII deficiency. To investigate molecular differences between these two aggressive cSCCs with different origin, we used global proteomic analysis of formalin-fixed paraffin-embedded human cSCC tissues. > @@ -119,7 +119,7 @@ The annotation file, group comparison file and FASTA file for this training is d # MaxQuant analysis -The run time of **MaxQuant** {% icon tool %} depends on the number and size of the input files and on the chosen parameters. The run of the training datasets will take a few hours, but the training can be directly continued with the MaxQuant result files from Zenodo. We start the MaxQuant run with the default parameters, with a few adjustments. Protein level quantification parameters do not really matter here, because MSstats will use feature quantifications and perform protein summarization based on them. A quality control report is generated with the [PTXQC functionality](https://pubs.acs.org/doi/10.1021/acs.jproteome.5b00780) that is directly implemented in the MaxQuant Galaxy tool. To continue with statistical analysis in MSstats, the Protein Groups and the Evidence files are needed from MaxQuant. +The run time of **MaxQuant** {% icon tool %} depends on the number and size of the input files and on the chosen parameters. The run of the training datasets will take a few hours, but the training can be directly continued with the MaxQuant result files from Zenodo. We start the MaxQuant run with the default parameters, with a few adjustments. Protein level quantification parameters do not really matter here, because MSstats will use feature quantifications and perform protein summarization based on them. A quality control report is generated with the PTXQC functionality {% cite Bielow2015 %} that is directly implemented in the MaxQuant Galaxy tool. To continue with statistical analysis in MSstats, the Protein Groups and the Evidence files are needed from MaxQuant. > Optional: MaxQuant analysis > @@ -239,8 +239,8 @@ We use the modified MaxQuant protein groups and evidence files as input in MSsta ## More details on MSstats -MSstats is designed for statistical modelling of mass spectrometry based proteomic data [Choi 2014](https://doi.org/10.1093/bioinformatics/btu305 ). -Proteomic data analysis requires statistical approaches that reduce bias and inefficiencies and distinguish systematic variation from random artifacts [Käll and Vitek 2011]( https://doi.org/10.1371/journal.pcbi.1002277). +MSstats is designed for statistical modelling of mass spectrometry based proteomic data {% cite Choi2014 %}. +Proteomic data analysis requires statistical approaches that reduce bias and inefficiencies and distinguish systematic variation from random artifacts {% cite Kll2011 %}. MSstats is directly compatible with the output of several quantitative proteomics software. In addition to the results of the proteomics software an annotation file is needed as input. The annotation file describes the experimental design such as the conditions, biological and technical replicates. To be compatible with MaxQuant results, an additional column with the label type is needed, which only contains L (light) in DDA experiments. A wrong setup of the annotation file is the most common source of errors in MSstats, thus we collected more information in the box below to allow you to adjust the annotation file when analyzing your own experiments. From c887eb1134598909be9cb25531cadbc104e63182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melanie=20C=2E=20F=C3=B6ll?= Date: Tue, 3 Sep 2024 10:23:12 +0200 Subject: [PATCH 4/4] adding more citations to bib file --- .../maxquant-msstats-dda-lfq/tutorial.bib | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib index 017f34394116e9..25bf74600631c4 100644 --- a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib +++ b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib @@ -26,3 +26,34 @@ @article{Choi2014 month = may, pages = {2524–2526} } + +@article{Bielow2015, + title = {Proteomics Quality Control: Quality Control Software for MaxQuant Results}, + volume = {15}, + ISSN = {1535-3907}, + url = {http://dx.doi.org/10.1021/acs.jproteome.5b00780}, + DOI = {10.1021/acs.jproteome.5b00780}, + number = {3}, + journal = {Journal of Proteome Research}, + publisher = {American Chemical Society (ACS)}, + author = {Bielow, Chris and Mastrobuoni, Guido and Kempa, Stefan}, + year = {2015}, + month = dec, + pages = {777–787} +} + +@article{Kll2011, + title = {Computational Mass Spectrometry–Based Proteomics}, + volume = {7}, + ISSN = {1553-7358}, + url = {http://dx.doi.org/10.1371/journal.pcbi.1002277}, + DOI = {10.1371/journal.pcbi.1002277}, + number = {12}, + journal = {PLoS Computational Biology}, + publisher = {Public Library of Science (PLoS)}, + author = {K\"{a}ll, Lukas and Vitek, Olga}, + editor = {Lewitter, Fran}, + year = {2011}, + month = dec, + pages = {e1002277} +}