Skip to content

Commit

Permalink
Fix performance problem in EncyclopeDiaLibrary.LibraryDetails (#3192)
Browse files Browse the repository at this point in the history
Fixed performance problem displaying library details for very large .elib files (nor reported by anyone)

For 4GB .elib files, the query in "LibraryDetails" was taking an hour to figure out how many "best spectra" each file had.
That information is already available in the ElibSpectrumInfo's, so it can be calculated almost instantly.
I hit the problem when trying to share minimize a Skyline document with a very large .elib file. New code had recently been added to BlibDb.MinimizeLibrary which uses "LibraryDetails".
  • Loading branch information
nickshulman authored Nov 5, 2024
1 parent be87df0 commit 86a9ba6
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 53 deletions.
74 changes: 24 additions & 50 deletions pwiz_tools/Skyline/Model/Lib/EncylopeDiaLibrary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,62 +157,36 @@ public override LibraryDetails LibraryDetails
{
get
{
var detailsByFileName = new Dictionary<string, SpectrumSourceFileDetails>();

try
return new LibraryDetails
{
lock (_pooledSqliteConnection)
{
using (var select = new SQLiteCommand(_pooledSqliteConnection.Connection))
{
// ReSharper disable LocalizableElement

// Query for the source files detail information.
select.CommandText = @"select one.SourceFile, BestSpectra, MatchedSpectra
from (
select SourceFile, count(*) as MatchedSpectra
from entries as s
group by SourceFile) as one
inner join (
select SourceFile, count(*) as BestSpectra
from entries as e
where Score = (select min(Score) from entries where e.PeptideModSeq = PeptideModSeq AND e.PrecursorCharge = PrecursorCharge)
group by SourceFile) as two
on one.SourceFile = two.SourceFile";

// ReSharper restore LocalizableElement
using (SQLiteDataReader reader = select.ExecuteReader())
{
while (reader.Read())
{
string filename = reader.GetString(0);
var fileDetails = new SpectrumSourceFileDetails(filename);
fileDetails.ScoreThresholds.Add(ScoreType.GenericQValue, null);
fileDetails.BestSpectrum = Convert.ToInt32(reader.GetValue(1));
fileDetails.MatchedSpectrum = Convert.ToInt32(reader.GetValue(2));
detailsByFileName.Add(filename, fileDetails);
}
}
}
}
}
catch (Exception)
DataFiles = EnumerateSpectrumSourceFileDetails().ToList()
// Consider: UniquePeptideCount, SpectrumCount
};
}
}

private IEnumerable<SpectrumSourceFileDetails> EnumerateSpectrumSourceFileDetails()
{
var bestSpectrumCounts = new int[LibraryFiles.Count];
var matchedSpectrumCounts = new int[LibraryFiles.Count];
foreach (var entry in _libraryEntries)
{
bestSpectrumCounts[entry.BestFileId]++;
foreach (var fileData in entry.FileDatas)
{
// Do nothing more. Simply return any details collected or minimum information.
matchedSpectrumCounts[fileData.Key]++;
}
}

return new LibraryDetails
for (int iFile = 0; iFile < LibraryFiles.Count; iFile++)
{
var details = new SpectrumSourceFileDetails(LibraryFiles[iFile])
{
DataFiles = _sourceFiles.Select(file =>
{
if (!detailsByFileName.TryGetValue(file, out var fileDetails))
{
fileDetails = new SpectrumSourceFileDetails(file);
fileDetails.ScoreThresholds.Add(ScoreType.GenericQValue, null);
}
return fileDetails;
})
BestSpectrum = bestSpectrumCounts[iFile],
MatchedSpectrum = matchedSpectrumCounts[iFile],
};
details.ScoreThresholds.Add(ScoreType.GenericQValue, null);
yield return details;
}
}

Expand Down
6 changes: 3 additions & 3 deletions pwiz_tools/Skyline/Model/Lib/Library.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2829,7 +2829,7 @@ public override string ToString()
public sealed class LibraryDetails
{
private readonly IList<LibraryLink> _libLinks;
private IEnumerable<SpectrumSourceFileDetails> _dataFiles;
private ImmutableList<SpectrumSourceFileDetails> _dataFiles = ImmutableList<SpectrumSourceFileDetails>.EMPTY;

public LibraryDetails()
{
Expand All @@ -2856,8 +2856,8 @@ public void AddLink(LibraryLink link)
public int TotalPsmCount { get; set; }
public IEnumerable<SpectrumSourceFileDetails> DataFiles
{
get { return _dataFiles ?? (_dataFiles = new List<SpectrumSourceFileDetails>()); }
set { _dataFiles = value; }
get { return _dataFiles; }
set { _dataFiles = ImmutableList.ValueOfOrEmpty(value); }
}

public IEnumerable<LibraryLink> LibLinks
Expand Down

0 comments on commit 86a9ba6

Please sign in to comment.