Skip to content

Commit

Permalink
- fixed MsFraggerSearchEngine to fill in percolator_qvalue from DIA s…
Browse files Browse the repository at this point in the history
…earches properly, and if a PSM id in pepXML can't be found in the percolator TSV file, to put in percolator_qvalue=1

- changed MsFraggerSearchEngine to use Alexey's recommended DDA search settings for low resolution DIA data (e.g. Stellar) when the workflow is DIA
* added tracking of Default (initial) values to AbstractDdaSearchEngine.Setting
  • Loading branch information
chambm committed Nov 6, 2024
1 parent cbf4a10 commit fe891d1
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 88 deletions.
15 changes: 9 additions & 6 deletions pwiz_tools/Skyline/Model/AbstractDdaSearchEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,23 @@ public class Setting : IAuditLogObject
public Setting(string name, int defaultValue, int minValue = int.MinValue, int maxValue = int.MaxValue)
{
Name = name;
_value = defaultValue;
_value = DefaultValue = defaultValue;
MinValue = minValue;
MaxValue = maxValue;
}

public Setting(string name, double defaultValue, double minValue = double.MinValue, double maxValue = double.MaxValue)
{
Name = name;
_value = defaultValue;
_value = DefaultValue = defaultValue;
MinValue = minValue;
MaxValue = maxValue;
}

public Setting(string name, bool defaultValue)
{
Name = name;
_value = defaultValue;
_value = DefaultValue = defaultValue;
MinValue = false;
MaxValue = true;
}
Expand All @@ -78,16 +78,16 @@ public Setting(string name, string defaultValue = null, IEnumerable<string> vali
{
Name = name;
MinValue = string.Empty;
_value = defaultValue ?? string.Empty;
_value = DefaultValue = defaultValue ?? string.Empty;
ValidValues = validValues;
}

public Setting(Setting other)
public Setting(Setting other, object newValue = null)
{
Name = other.Name;
MinValue = other.MinValue;
MaxValue = other.MaxValue;
_value = other.Value;
_value = DefaultValue = newValue ?? other.Value;
ValidValues = other.ValidValues;
}

Expand All @@ -104,6 +104,9 @@ public object Value
set { _value = Validate(value); }
}

public object DefaultValue { get; }
public bool IsDefault => Equals(DefaultValue, _value);

public object Validate(object value)
{
// incoming value must either be a string or value type must stay the same
Expand Down
83 changes: 77 additions & 6 deletions pwiz_tools/Skyline/Model/DdaSearch/MsFraggerSearchEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,14 @@ public enum DataType
dia_gpf = 2
}

private bool DataIsDIA => (DataType) AdditionalSettings[@"data_type"].Value != DataType.dda;
private string PepXmlSuffix => DataIsDIA ? @"_rank1.pepXML" : @".pepXML";
private readonly DataType _initialDataType;
private bool DataIsDIA => _initialDataType != DataType.dda;
private string PepXmlSuffix => (DataType) AdditionalSettings[@"data_type"].Value != DataType.dda ? @"_rank1.pepXML" : @".pepXML";

public MsFraggerSearchEngine(DataType dataType)
{
_initialDataType = dataType;

AdditionalSettings = new Dictionary<string, Setting>
{
{CHECK_SPECTRAL_FILES, new Setting(CHECK_SPECTRAL_FILES, 1, 0, 1)},
Expand Down Expand Up @@ -148,6 +151,23 @@ private void AddAdditionalSetting(List<string> settingNameList, Setting setting)
AdditionalSettings[setting.Name] = setting;
}

private Dictionary<string, Setting> _replacedAdditionalSettings = new Dictionary<string, Setting>();
private void ReplaceAdditionalSettingIfDefault(string settingName, object newValue)
{
if (!AdditionalSettings[settingName].IsDefault)
return;
_replacedAdditionalSettings[settingName] = AdditionalSettings[settingName];
AdditionalSettings[settingName] = new Setting(AdditionalSettings[settingName], newValue);
}

private void RestoreAdditionalSettingIfDefault(string settingName)
{
if (!AdditionalSettings[settingName].IsDefault)
return;
AdditionalSettings[settingName] = _replacedAdditionalSettings[settingName];
_replacedAdditionalSettings.Remove(settingName);
}

private static readonly string[] FRAGMENTATION_METHODS =
{
@"b,y",
Expand Down Expand Up @@ -465,7 +485,7 @@ private void GetPercolatorScores(string percolatorTsvFilepath, Dictionary<string
private void FixPercolatorPepXml(string cruxOutputFilepath, string finalOutputFilepath, MsDataFileUri spectrumFilename, Dictionary<string, double> qvalueByPsmId, IProgressMonitor monitor)
{
bool isBrukerSource = DataSourceUtil.GetSourceType(spectrumFilename.GetFilePath()) == DataSourceUtil.TYPE_BRUKER;
var lastPsmIdRegex = new Regex(@".* spectrum=""([^""]+?)"" .*",RegexOptions.Compiled);
var lastPsmIdRegex = new Regex(@".* assumed_charge=""(\d+)"" spectrum=""([^""]+?)\.\d+"" .*", RegexOptions.Compiled);

// This looks for an ampersand that is NOT followed by:
// - "amp;", "lt;", "gt;", "quot;", "apos;" (predefined XML entities)
Expand All @@ -486,14 +506,17 @@ private void FixPercolatorPepXml(string cruxOutputFilepath, string finalOutputFi
line = unescapedAmpersandRegex.Replace(line, @"&amp;");
}
if (line.Contains(@"<spectrum_query"))
lastPsmId = lastPsmIdRegex.Replace(line, "$1");
lastPsmId = lastPsmIdRegex.Replace(line, "$2.$1");
else if (line.Contains(@"<search_score name=""hyperscore"""))
{
if (qvalueByPsmId.ContainsKey(lastPsmId))
fixedPepXmlFile.WriteLine(@"<search_score name=""percolator_qvalue"" value=""{0}"" />", qvalueByPsmId[lastPsmId].ToString(CultureInfo.InvariantCulture));
// MCC: This happens when percolator's text tables drops a PSM that is in pepXML; I'm not sure why it happens though.
//else
// Console.WriteLine($"{lastPsmId} not found in percolator scores.");
else
{
fixedPepXmlFile.WriteLine(@"<search_score name=""percolator_qvalue"" value=""1"" />");
//Console.WriteLine($"{lastPsmId} not found in percolator scores.");
}
}
else if (line.Contains(@"</search_summary>"))
{
Expand Down Expand Up @@ -1000,9 +1023,57 @@ public override void SetMs2Analyzer(string ms2Analyzer)
// not used by MSFragger
}

private static bool IsLowRes(MzTolerance mzTolerance)
{
if (mzTolerance == null || mzTolerance.Value == 0)
return false;
return mzTolerance.Unit == MzTolerance.Units.ppm
? mzTolerance.Value > 50
: mzTolerance.Value > 0.05 /* Da */;
}

public override void SetPrecursorMassTolerance(MzTolerance mzTolerance)
{
bool wasLowRes = IsLowRes(_precursorMzTolerance);

_precursorMzTolerance = mzTolerance;

if (!DataIsDIA)
return;

// Use DDA settings for DIA if precursor tolerance is not high resolution:
// From Alexey: running as DDA data, with +- 3 Da window, 0.2 Da, 100 peaks per spectrum, charge state 1 fragments for scoring only
bool nowLowRes = IsLowRes(mzTolerance);
bool lowResChanged = wasLowRes != nowLowRes;
if (!lowResChanged)
return;

if (nowLowRes)
{
ReplaceAdditionalSettingIfDefault(@"data_type", (int) DataType.dda);
ReplaceAdditionalSettingIfDefault(@"calibrate_mass", 0);
ReplaceAdditionalSettingIfDefault(@"deisotope", 0);
ReplaceAdditionalSettingIfDefault(@"deneutralloss", 0);
ReplaceAdditionalSettingIfDefault(@"intensity_transform", 0);
//ReplaceAdditionalSettingIfDefault(@"output_report_topN", 1);
ReplaceAdditionalSettingIfDefault(@"precursor_charge", @"1 4");
ReplaceAdditionalSettingIfDefault(@"max_fragment_charge", 1);
ReplaceAdditionalSettingIfDefault(@"use_topN_peaks", 100);
ReplaceAdditionalSettingIfDefault(@"minimum_ratio", 0.01);
}
else
{
RestoreAdditionalSettingIfDefault(@"data_type");
RestoreAdditionalSettingIfDefault(@"calibrate_mass");
RestoreAdditionalSettingIfDefault(@"deisotope");
RestoreAdditionalSettingIfDefault(@"deneutralloss");
RestoreAdditionalSettingIfDefault(@"intensity_transform");
//RestoreAdditionalSettingIfDefault(@"output_report_topN");
RestoreAdditionalSettingIfDefault(@"precursor_charge");
RestoreAdditionalSettingIfDefault(@"max_fragment_charge");
RestoreAdditionalSettingIfDefault(@"use_topN_peaks");
RestoreAdditionalSettingIfDefault(@"minimum_ratio");
}
}

public override void SetCutoffScore(double cutoffScore)
Expand Down
19 changes: 13 additions & 6 deletions pwiz_tools/Skyline/TestPerf/DiaSearchTutorialTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ private class AnalysisValues
public string ProteinToSelect;
public string PeptideToSelect;

public MzTolerance PrecursorMzTolerance;
public MzTolerance FragmentMzTolerance;
public Dictionary<string, string> AdditionalSettings;
}

Expand All @@ -90,7 +92,7 @@ public void TestDiaSearchStellarTutorial()
IsolationSchemeHasGaps = true,
IsolationSchemeHasOverlaps = false,

FinalTargetCounts = new[] { 3801, 4730, 4730, 30353 },
FinalTargetCounts = new[] { 107, 145, 145, 910 },

ZipPath = "https://skyline.ms/tutorials/DiaSearchTutorial.zip",
DiaFiles = new[] {
Expand All @@ -99,9 +101,11 @@ public void TestDiaSearchStellarTutorial()
},
IsGpfData = true,

ProteinToSelect = "sp|P01591|IGJ_HUMAN",
PeptideToSelect = "ENISDPTSPLR",
ProteinToSelect = "sp|O15240|VGF_HUMAN",
PeptideToSelect = "LADLASDLLLQYLLQGGAR",

PrecursorMzTolerance = new MzTolerance(3.0),
FragmentMzTolerance = new MzTolerance(0.2),
AdditionalSettings = new Dictionary<string, string>
{
//{ "data_type", "2" } // set MSFragger to GPF mode
Expand Down Expand Up @@ -157,6 +161,9 @@ public void TestDiaSearchQeTutorial()

ProteinToSelect = "sp|P21333|FLNA_HUMAN",
PeptideToSelect = "IANLQTDLSDGLR",

PrecursorMzTolerance = new MzTolerance(10, MzTolerance.Units.ppm),
FragmentMzTolerance = new MzTolerance(10, MzTolerance.Units.ppm),
};

TestFilesZipPaths = new[]
Expand Down Expand Up @@ -391,8 +398,8 @@ private void TestMsFraggerSearch()

RunUI(() =>
{
importPeptideSearchDlg.SearchSettingsControl.PrecursorTolerance = new MzTolerance(10, MzTolerance.Units.ppm);
importPeptideSearchDlg.SearchSettingsControl.FragmentTolerance = new MzTolerance(10, MzTolerance.Units.ppm);
importPeptideSearchDlg.SearchSettingsControl.PrecursorTolerance = _analysisValues.PrecursorMzTolerance;
importPeptideSearchDlg.SearchSettingsControl.FragmentTolerance = _analysisValues.FragmentMzTolerance;
// Using the default q value of 0.01 (FDR 1%) is best for teaching and requires less explaining
// importPeptideSearchDlg.SearchSettingsControl.CutoffScore = 0.05;
importPeptideSearchDlg.SearchSettingsControl.SetAdditionalSetting("check_spectral_files", "0");
Expand All @@ -406,7 +413,7 @@ private void TestMsFraggerSearch()
});
PauseForScreenShot<ImportPeptideSearchDlg.DDASearchSettingsPage>("Import Peptide Search - Search Settings page", tutorialPage++);

WaitForConditionUI(() => MzTolerance.Units.ppm == importPeptideSearchDlg.SearchSettingsControl.FragmentTolerance.Unit);
WaitForConditionUI(() => _analysisValues.FragmentMzTolerance.Unit == importPeptideSearchDlg.SearchSettingsControl.FragmentTolerance.Unit);

// Run the search
SkylineWindow.BeginInvoke(new Action(() => Assert.IsTrue(importPeptideSearchDlg.ClickNextButton())));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ Import Peptide Search > Associate Proteins > Find minimal protein list is False
Import Peptide Search > Associate Proteins > Remove subset proteins is False
Import Peptide Search > Associate Proteins > Shared peptides option is "Assigned to first protein"
Import Peptide Search > Associate Proteins > Min peptides per protein is "1"
Import Peptide Search > Associate Proteins > Proteins mapped is "3988"
Import Peptide Search > Associate Proteins > Peptides mapped is "4720"
Import Peptide Search > Associate Proteins > Proteins unmapped is "16383"
Import Peptide Search > Associate Proteins > Protein targets is "3801"
Import Peptide Search > Associate Proteins > Peptide targets is "4730"
Import Peptide Search > Associate Proteins > Proteins mapped is "136"
Import Peptide Search > Associate Proteins > Peptides mapped is "143"
Import Peptide Search > Associate Proteins > Proteins unmapped is "20235"
Import Peptide Search > Associate Proteins > Protein targets is "107"
Import Peptide Search > Associate Proteins > Peptide targets is "145"
Import Peptide Search > DDA search settings > Search Engine is "MSFragger"
Import Peptide Search > DDA search settings > Precursor tolerance is "10" ppm
Import Peptide Search > DDA search settings > Fragment tolerance is "10" ppm
Import Peptide Search > DDA search settings > Precursor tolerance is "3" m/z
Import Peptide Search > DDA search settings > Fragment tolerance is "0.2" m/z
Import Peptide Search > DDA search settings > Max variable mods per peptide is "3"
Import Peptide Search > DDA search settings > Fragment ion types is "b,y"
Import Peptide Search > DDA search settings > Score type is "Max q-value"
Expand Down Expand Up @@ -95,17 +95,17 @@ Associate Proteins =
Remove subset proteins = False,
Shared peptides option = "Assigned to first protein",
Min peptides per protein = "1",
Proteins mapped = "3988",
Peptides mapped = "4720",
Proteins unmapped = "16383",
Protein targets = "3801",
Peptide targets = "4730"
Proteins mapped = "136",
Peptides mapped = "143",
Proteins unmapped = "20235",
Protein targets = "107",
Peptide targets = "145"
},
DDA search settings =
{
Search Engine = "MSFragger",
Precursor tolerance = "10" ppm,
Fragment tolerance = "10" ppm,
Precursor tolerance = "3" m/z,
Fragment tolerance = "0.2" m/z,
Max variable mods per peptide = "3",
Fragment ion types = "b,y",
Score type = "Max q-value",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ Import Peptide Search > Associate Proteins > Find minimal protein list is False
Import Peptide Search > Associate Proteins > Remove subset proteins is False
Import Peptide Search > Associate Proteins > Shared peptides option is "Assigned to first protein"
Import Peptide Search > Associate Proteins > Min peptides per protein is "1"
Import Peptide Search > Associate Proteins > Proteins mapped is "3988"
Import Peptide Search > Associate Proteins > Peptides mapped is "4720"
Import Peptide Search > Associate Proteins > Proteins unmapped is "16383"
Import Peptide Search > Associate Proteins > Protein targets is "3801"
Import Peptide Search > Associate Proteins > Peptide targets is "4730"
Import Peptide Search > Associate Proteins > Proteins mapped is "136"
Import Peptide Search > Associate Proteins > Peptides mapped is "143"
Import Peptide Search > Associate Proteins > Proteins unmapped is "20235"
Import Peptide Search > Associate Proteins > Protein targets is "107"
Import Peptide Search > Associate Proteins > Peptide targets is "145"
Import Peptide Search > DDA search settings > Search Engine is "MSFragger"
Import Peptide Search > DDA search settings > Precursor tolerance is "10" ppm
Import Peptide Search > DDA search settings > Fragment tolerance is "10" ppm
Import Peptide Search > DDA search settings > Precursor tolerance is "3" m/z
Import Peptide Search > DDA search settings > Fragment tolerance is "0,2" m/z
Import Peptide Search > DDA search settings > Max variable mods per peptide is "3"
Import Peptide Search > DDA search settings > Fragment ion types is "b,y"
Import Peptide Search > DDA search settings > Score type is "Max q-value"
Expand Down Expand Up @@ -95,17 +95,17 @@ Associate Proteins =
Remove subset proteins = False,
Shared peptides option = "Assigned to first protein",
Min peptides per protein = "1",
Proteins mapped = "3988",
Peptides mapped = "4720",
Proteins unmapped = "16383",
Protein targets = "3801",
Peptide targets = "4730"
Proteins mapped = "136",
Peptides mapped = "143",
Proteins unmapped = "20235",
Protein targets = "107",
Peptide targets = "145"
},
DDA search settings =
{
Search Engine = "MSFragger",
Precursor tolerance = "10" ppm,
Fragment tolerance = "10" ppm,
Precursor tolerance = "3" m/z,
Fragment tolerance = "0,2" m/z,
Max variable mods per peptide = "3",
Fragment ion types = "b,y",
Score type = "Max q-value",
Expand Down
Loading

0 comments on commit fe891d1

Please sign in to comment.