From 86a9ba60b51c7a8eb0dcb5451b421c45768f74dd Mon Sep 17 00:00:00 2001 From: nickshulman Date: Tue, 5 Nov 2024 09:46:41 -0800 Subject: [PATCH] Fix performance problem in EncyclopeDiaLibrary.LibraryDetails (#3192) Fixed performance problem displaying library details for very large .elib files (nor reported by anyone) For 4GB .elib files, the query in "LibraryDetails" was taking an hour to figure out how many "best spectra" each file had. That information is already available in the ElibSpectrumInfo's, so it can be calculated almost instantly. I hit the problem when trying to share minimize a Skyline document with a very large .elib file. New code had recently been added to BlibDb.MinimizeLibrary which uses "LibraryDetails". --- .../Skyline/Model/Lib/EncylopeDiaLibrary.cs | 74 ++++++------------- pwiz_tools/Skyline/Model/Lib/Library.cs | 6 +- 2 files changed, 27 insertions(+), 53 deletions(-) diff --git a/pwiz_tools/Skyline/Model/Lib/EncylopeDiaLibrary.cs b/pwiz_tools/Skyline/Model/Lib/EncylopeDiaLibrary.cs index a8858f1ecc..3559bcba32 100644 --- a/pwiz_tools/Skyline/Model/Lib/EncylopeDiaLibrary.cs +++ b/pwiz_tools/Skyline/Model/Lib/EncylopeDiaLibrary.cs @@ -157,62 +157,36 @@ public override LibraryDetails LibraryDetails { get { - var detailsByFileName = new Dictionary(); - - try + return new LibraryDetails { - lock (_pooledSqliteConnection) - { - using (var select = new SQLiteCommand(_pooledSqliteConnection.Connection)) - { - // ReSharper disable LocalizableElement - - // Query for the source files detail information. - select.CommandText = @"select one.SourceFile, BestSpectra, MatchedSpectra -from ( - select SourceFile, count(*) as MatchedSpectra - from entries as s - group by SourceFile) as one -inner join ( - select SourceFile, count(*) as BestSpectra - from entries as e - where Score = (select min(Score) from entries where e.PeptideModSeq = PeptideModSeq AND e.PrecursorCharge = PrecursorCharge) - group by SourceFile) as two -on one.SourceFile = two.SourceFile"; - - // ReSharper restore LocalizableElement - using (SQLiteDataReader reader = select.ExecuteReader()) - { - while (reader.Read()) - { - string filename = reader.GetString(0); - var fileDetails = new SpectrumSourceFileDetails(filename); - fileDetails.ScoreThresholds.Add(ScoreType.GenericQValue, null); - fileDetails.BestSpectrum = Convert.ToInt32(reader.GetValue(1)); - fileDetails.MatchedSpectrum = Convert.ToInt32(reader.GetValue(2)); - detailsByFileName.Add(filename, fileDetails); - } - } - } - } - } - catch (Exception) + DataFiles = EnumerateSpectrumSourceFileDetails().ToList() + // Consider: UniquePeptideCount, SpectrumCount + }; + } + } + + private IEnumerable EnumerateSpectrumSourceFileDetails() + { + var bestSpectrumCounts = new int[LibraryFiles.Count]; + var matchedSpectrumCounts = new int[LibraryFiles.Count]; + foreach (var entry in _libraryEntries) + { + bestSpectrumCounts[entry.BestFileId]++; + foreach (var fileData in entry.FileDatas) { - // Do nothing more. Simply return any details collected or minimum information. + matchedSpectrumCounts[fileData.Key]++; } + } - return new LibraryDetails + for (int iFile = 0; iFile < LibraryFiles.Count; iFile++) + { + var details = new SpectrumSourceFileDetails(LibraryFiles[iFile]) { - DataFiles = _sourceFiles.Select(file => - { - if (!detailsByFileName.TryGetValue(file, out var fileDetails)) - { - fileDetails = new SpectrumSourceFileDetails(file); - fileDetails.ScoreThresholds.Add(ScoreType.GenericQValue, null); - } - return fileDetails; - }) + BestSpectrum = bestSpectrumCounts[iFile], + MatchedSpectrum = matchedSpectrumCounts[iFile], }; + details.ScoreThresholds.Add(ScoreType.GenericQValue, null); + yield return details; } } diff --git a/pwiz_tools/Skyline/Model/Lib/Library.cs b/pwiz_tools/Skyline/Model/Lib/Library.cs index 192f1305d9..99efe194c9 100644 --- a/pwiz_tools/Skyline/Model/Lib/Library.cs +++ b/pwiz_tools/Skyline/Model/Lib/Library.cs @@ -2829,7 +2829,7 @@ public override string ToString() public sealed class LibraryDetails { private readonly IList _libLinks; - private IEnumerable _dataFiles; + private ImmutableList _dataFiles = ImmutableList.EMPTY; public LibraryDetails() { @@ -2856,8 +2856,8 @@ public void AddLink(LibraryLink link) public int TotalPsmCount { get; set; } public IEnumerable DataFiles { - get { return _dataFiles ?? (_dataFiles = new List()); } - set { _dataFiles = value; } + get { return _dataFiles; } + set { _dataFiles = ImmutableList.ValueOfOrEmpty(value); } } public IEnumerable LibLinks