From 31a1f91bca671acd25a239e060de12bfd926ca2b Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Tue, 19 Nov 2024 20:19:52 +0100 Subject: [PATCH] PostingsForLabelMatching: check for waiting writers There's still some contention on MemPostings.mtx, I checked goroutines from a profile at a spiking mutex wait time moment, I filtered by `MemPostings` and all I see is: - 4 goroutines waiting on `p.mtx.RLock()` in `LabelValues` - 203 goroutines waiting on `p.mtx.RLock()` in `Get` - 6 goroutines waiting on `p.mtx.Lock()` in `Add` - 4 goroutines waiting on first `p.mtx.RLock` in `PostingsForLabelMatching` - 2 goroutines in `for _, v := range vals` loop in `PostingsForLabelMatching` So, what I undersand is going on here: - The last two goroutines were able to enter the `PostingsForLabelMatching` second mutex, so they're creating ListPostings as there were no tomorrow (I'll send a separate PR to Prometheus to avoid creating one pointer for each one there) - The six goroutines waiting on `Add` are blocking the reset 200+ read goroutines. So, here we have 2 read goroutines blocking 200 goroutines (plus six read ones, if someone cares). What this change proposes is to check every few iterations (1024, because I expect this loop to go quite fast) whether there's a Lock() waiting. If there is one, we'll stop, unlock, and let it happen, because if we're being selfish, no other reader can get in here. Signed-off-by: Oleg Zaytsev --- tsdb/index/postings.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 38aa3fb34..824e65a83 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -471,7 +471,17 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, its := make([]Postings, 0, len(vals)) p.mtx.RLock() e := p.m[name] + iter := 0 for _, v := range vals { + iter++ + if iter%1024 == 0 && p.writersWaiting() { + // There are writers waiting, so unlock the mutex, and use this opportunity to check the context. + p.mtx.RUnlock() + if ctx.Err() != nil { + return ErrPostings(ctx.Err()) + } + p.mtx.RLock() + } if refs, ok := e[v]; ok { // Some of the values may have been garbage-collected in the meantime this is fine, we'll just skip them. // If we didn't let the mutex go, we'd have these postings here, but they would be pointing nowhere @@ -486,6 +496,15 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, return Merge(ctx, its...) } +// writersWaiting can be called from an RLock()ed-goroutine to check whether there are other goroutines waiting on Lock() +func (p *MemPostings) writersWaiting() bool { + if !p.mtx.TryRLock() { + return true + } + p.mtx.RUnlock() + return false +} + // ExpandPostings returns the postings expanded as a slice. func ExpandPostings(p Postings) (res []storage.SeriesRef, err error) { for p.Next() {