Merge pull request galaxyproject#5272 from galaxyproject/foellmelanie…

…-patch-2 Update maxquant and msstats tutorial
rlibouba · Sep 3, 2024 · 993b1d4 · 993b1d4
2 parents 0ad1cd1 + c887eb1
commit 993b1d4
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 34 deletions.
diff --git a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.bib
@@ -1,30 +1,59 @@
+@article{Fll2018,
+  title = {Identification of tissue damage,  extracellular matrix remodeling and bacterial challenge as common mechanisms associated with high-risk cutaneous squamous cell carcinomas},
+  volume = {66},
+  ISSN = {0945-053X},
+  url = {http://dx.doi.org/10.1016/j.matbio.2017.11.004},
+  DOI = {10.1016/j.matbio.2017.11.004},
+  journal = {Matrix Biology},
+  publisher = {Elsevier BV},
+  author = {F\"{o}ll,  Melanie C. and Fahrner,  Matthias and Gretzmeier,  Christine and Thoma,  K\"{a}the and Biniossek,  Martin L. and Kiritsi,  Dimitra and Meiss,  Frank and Schilling,  Oliver and Nystr\"{o}m,  Alexander and Kern,  Johannes S.},
+  year = {2018},
+  month = mar,
+  pages = {1–21}
+}
 
-# This is the bibliography file for your tutorial.
-#
-# To add bibliography (bibtex) entries here, follow these steps:
-#  1) Find the DOI for the article you want to cite
-#  2) Go to https://doi2bib.org and fill in the DOI
-#  3) Copy the resulting bibtex entry into this file
-#
-# To cite the example below, in your tutorial.md file
-# use {% Batut2018 %}
+@article{Choi2014,
+  title = {MSstats: an R package for statistical analysis of quantitative mass spectrometry-based proteomic experiments},
+  volume = {30},
+  ISSN = {1367-4811},
+  url = {http://dx.doi.org/10.1093/bioinformatics/btu305},
+  DOI = {10.1093/bioinformatics/btu305},
+  number = {17},
+  journal = {Bioinformatics},
+  publisher = {Oxford University Press (OUP)},
+  author = {Choi,  Meena and Chang,  Ching-Yun and Clough,  Timothy and Broudy,  Daniel and Killeen,  Trevor and MacLean,  Brendan and Vitek,  Olga},
+  year = {2014},
+  month = may,
+  pages = {2524–2526}
+}
 
-@article{Batut2018,
-  doi = {10.1016/j.cels.2018.05.012},
-  url = {https://doi.org/10.1016/j.cels.2018.05.012},
-  year = {2018},
-  month = jun,
-  publisher = {Elsevier {BV}},
-  volume = {6},
-  number = {6},
-  pages = {752--758.e1},
-  author = {B{\'{e}}r{\'{e}}nice Batut and Saskia Hiltemann and Andrea Bagnacani and Dannon Baker and Vivek Bhardwaj and
-           Clemens Blank and Anthony Bretaudeau and Loraine Brillet-Gu{\'{e}}guen and Martin {\v{C}}ech and John Chilton
-           and Dave Clements and Olivia Doppelt-Azeroual and Anika Erxleben and Mallory Ann Freeberg and Simon Gladman and
-           Youri Hoogstrate and Hans-Rudolf Hotz and Torsten Houwaart and Pratik Jagtap and Delphine Larivi{\`{e}}re and
-           Gildas Le Corguill{\'{e}} and Thomas Manke and Fabien Mareuil and Fidel Ram{\'{i}}rez and Devon Ryan and
-           Florian Christoph Sigloch and Nicola Soranzo and Joachim Wolff and Pavankumar Videm and Markus Wolfien and
-           Aisanjiang Wubuli and Dilmurat Yusuf and James Taylor and Rolf Backofen and Anton Nekrutenko and Bj\"{o}rn Gr\"{u}ning},
-  title = {Community-Driven Data Analysis Training for Biology},
-  journal = {Cell Systems}
-}
+@article{Bielow2015,
+  title = {Proteomics Quality Control: Quality Control Software for MaxQuant Results},
+  volume = {15},
+  ISSN = {1535-3907},
+  url = {http://dx.doi.org/10.1021/acs.jproteome.5b00780},
+  DOI = {10.1021/acs.jproteome.5b00780},
+  number = {3},
+  journal = {Journal of Proteome Research},
+  publisher = {American Chemical Society (ACS)},
+  author = {Bielow,  Chris and Mastrobuoni,  Guido and Kempa,  Stefan},
+  year = {2015},
+  month = dec,
+  pages = {777–787}
+}
+
+@article{Kll2011,
+  title = {Computational Mass Spectrometry–Based Proteomics},
+  volume = {7},
+  ISSN = {1553-7358},
+  url = {http://dx.doi.org/10.1371/journal.pcbi.1002277},
+  DOI = {10.1371/journal.pcbi.1002277},
+  number = {12},
+  journal = {PLoS Computational Biology},
+  publisher = {Public Library of Science (PLoS)},
+  author = {K\"{a}ll,  Lukas and Vitek,  Olga},
+  editor = {Lewitter,  Fran},
+  year = {2011},
+  month = dec,
+  pages = {e1002277}
+}
diff --git a/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md b/topics/proteomics/tutorials/maxquant-msstats-dda-lfq/tutorial.md
@@ -42,7 +42,7 @@ recordings:
 Modern mass spectrometry-based proteomics enables the identification and quantification of thousands of proteins. Therefore, quantitative mass spectrometry represents an indispensable technology for biological and clinical research. Statistical analyses are required for the unbiased answering of scientific questions and to uncover all important information in the proteomic data. Classical statistical approaches and methods from other omics technologies are not ideal because they do not take into account the speciality of mass spectrometry data that include several thousands of proteins but often only a few dozens of samples (referred to as ‘curse of dimensionality’) and stochastic data properties that reflect sample preparation and spectral acquisition (Choi 2014).
 
 In this training we will cover the full analysis workflow from label-free, data dependent acquisition (DDA) raw data to statistical results. We’ll use two popular quantitative proteomics software: MaxQuant and MSstats. MaxQuant allows protein identification and quantification for many different kinds of proteomics data (Cox and Mann 2008).  In case you have no previous experience with MaxQuant, we recommend to go through the [MaxQuant beginners tutorial]({{site.baseurl}}/topics/proteomics/tutorials/maxquant-label-free/tutorial.html) before. MSstats provides statistical functionalities to find differentially abundant peptides or proteins from data dependent acquisition (DDA), data independent acquisition (DIA) or single reaction monitoring (SRM) proteomic experiments.
-The training dataset consists of a skin cancer cohort of 19 patients, which is a subset of a [published study](https://doi.org/10.1016/j.matbio.2017.11.004). One fifth of all non melanoma skin cancers are cutaneous squamous cell carcinomas (cSCC) that mainly derive from exposure to ultraviolet light. Most cSCC have a good prognosis but the few metastasizing cSCC have dramatically increased mortality. Here, we compare these metastasizing cSCC to cSCC in patients with the genetic disease recessive dystrophic epidermolysis bullosa (RDEB). RDEB is a genetic skin blistering and extracellular matrix disease caused by collagen VII deficiency. To investigate molecular differences between these two aggressive cSCCs with different origin, we used global proteomic analysis of formalin-fixed paraffin-embedded human cSCC tissues.
+The training dataset consists of a skin cancer cohort of 19 patients, which is a subset of a published study {% cite Fll2018 %}. One fifth of all non melanoma skin cancers are cutaneous squamous cell carcinomas (cSCC) that mainly derive from exposure to ultraviolet light. Most cSCC have a good prognosis but the few metastasizing cSCC have dramatically increased mortality. Here, we compare these metastasizing cSCC to cSCC in patients with the genetic disease recessive dystrophic epidermolysis bullosa (RDEB). RDEB is a genetic skin blistering and extracellular matrix disease caused by collagen VII deficiency. To investigate molecular differences between these two aggressive cSCCs with different origin, we used global proteomic analysis of formalin-fixed paraffin-embedded human cSCC tissues.
 
 
 > <agenda-title></agenda-title>
@@ -119,7 +119,7 @@ The annotation file, group comparison file and FASTA file for this training is d
 
 # MaxQuant analysis
 
-The run time of **MaxQuant** {% icon tool %} depends on the number and size of the input files and on the chosen parameters. The run of the training datasets will take a few hours, but the training can be directly continued with the MaxQuant result files from Zenodo. We start the MaxQuant run with the default parameters, with a few adjustments. Protein level quantification parameters do not really matter here, because MSstats will use feature quantifications and perform protein summarization based on them. A quality control report is generated with the [PTXQC functionality](https://pubs.acs.org/doi/10.1021/acs.jproteome.5b00780) that is directly implemented in the MaxQuant Galaxy tool. To continue with statistical analysis in MSstats, the Protein Groups and the Evidence files are needed from MaxQuant.
+The run time of **MaxQuant** {% icon tool %} depends on the number and size of the input files and on the chosen parameters. The run of the training datasets will take a few hours, but the training can be directly continued with the MaxQuant result files from Zenodo. We start the MaxQuant run with the default parameters, with a few adjustments. Protein level quantification parameters do not really matter here, because MSstats will use feature quantifications and perform protein summarization based on them. A quality control report is generated with the PTXQC functionality {% cite Bielow2015 %} that is directly implemented in the MaxQuant Galaxy tool. To continue with statistical analysis in MSstats, the Protein Groups and the Evidence files are needed from MaxQuant.
 
 > <hands-on-title>Optional: MaxQuant analysis</hands-on-title>
 >
@@ -239,8 +239,8 @@ We use the modified MaxQuant protein groups and evidence files as input in MSsta
 
 ## More details on MSstats
 
-MSstats  is designed for statistical modelling of mass spectrometry based proteomic data [Choi 2014](https://doi.org/10.1093/bioinformatics/btu305 ).
-Proteomic data analysis requires statistical approaches that reduce bias and inefficiencies and distinguish systematic variation from random artifacts [Käll and Vitek 2011]( https://doi.org/10.1371/journal.pcbi.1002277).
+MSstats  is designed for statistical modelling of mass spectrometry based proteomic data {% cite Choi2014 %}.
+Proteomic data analysis requires statistical approaches that reduce bias and inefficiencies and distinguish systematic variation from random artifacts {% cite Kll2011 %}.
 
 MSstats is directly compatible with the output of several quantitative proteomics software. In addition to the results of the proteomics software an annotation file is needed as input. The annotation file describes the experimental design such as the conditions, biological and technical replicates. To be compatible with MaxQuant results, an additional column with the label type is needed, which only contains L (light) in DDA experiments. A wrong setup of the annotation file is the most common source of errors in MSstats, thus we collected more information in the box below to allow you to adjust the annotation file when analyzing your own experiments. 
 
@@ -461,11 +461,11 @@ In addition we retrieve for each Uniprot ID the corresponding protein names from
 
 > <hands-on-title>MSstats visualizations</hands-on-title>
 >
-> 1. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.2) %} with the following parameters:
+> 1. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.5) %} with the following parameters:
 >    - {% icon param-file %} *"Input file with IDs"*: `metastasized join` (output of **Join** {% icon tool %})
 >    - *"ID column"*: `c1`
 >    - *"Do you want to map IDs or retrieve data from UniProt"*: `Retrieve: request entries by uniprot accession using batch retrieval`
-> 2. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.2) %} with the following parameters:
+> 2. {% tool [UniProt ID mapping and retrieval](toolshed.g2.bx.psu.edu/repos/bgruening/uniprot_rest_interface/uniprot/0.5) %} with the following parameters:
 >    - {% icon param-file %} *"Input file with IDs"*: `rdeb join` (output of **Join** {% icon tool %})
 >    - *"ID column"*: `c1`
 >    - *"Do you want to map IDs or retrieve data from UniProt"*: `Retrieve: request entries by uniprot accession using batch retrieval`