-
Notifications
You must be signed in to change notification settings - Fork 442
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add views for metrics about pageserver requests (#9008)
The metrics include a histogram of how long we need to wait for a GetPage request, number of reconnects, and number of requests among other things. The metrics are not yet exported anywhere, but you can query them manually. Note: This does *not* bump the default version of the 'neon' extension. We will do that later, as a separate PR. The reason is that this allows us to roll back the compute image smoothly, if necessary. Once the image that includes the new extension .so file with the new functions has been rolled out, and we're confident that we don't need to roll back the image anymore, we can change default extension version and actually start using the new functions and views. This is what the view looks like: ``` postgres=# select * from neon_perf_counters ; metric | bucket_le | value ---------------------------------------+-----------+---------- getpage_wait_seconds_count | | 300 getpage_wait_seconds_sum | | 0.048506 getpage_wait_seconds_bucket | 2e-05 | 0 getpage_wait_seconds_bucket | 3e-05 | 0 getpage_wait_seconds_bucket | 6e-05 | 71 getpage_wait_seconds_bucket | 0.0001 | 124 getpage_wait_seconds_bucket | 0.0002 | 248 getpage_wait_seconds_bucket | 0.0003 | 279 getpage_wait_seconds_bucket | 0.0006 | 297 getpage_wait_seconds_bucket | 0.001 | 298 getpage_wait_seconds_bucket | 0.002 | 298 getpage_wait_seconds_bucket | 0.003 | 298 getpage_wait_seconds_bucket | 0.006 | 300 getpage_wait_seconds_bucket | 0.01 | 300 getpage_wait_seconds_bucket | 0.02 | 300 getpage_wait_seconds_bucket | 0.03 | 300 getpage_wait_seconds_bucket | 0.06 | 300 getpage_wait_seconds_bucket | 0.1 | 300 getpage_wait_seconds_bucket | 0.2 | 300 getpage_wait_seconds_bucket | 0.3 | 300 getpage_wait_seconds_bucket | 0.6 | 300 getpage_wait_seconds_bucket | 1 | 300 getpage_wait_seconds_bucket | 2 | 300 getpage_wait_seconds_bucket | 3 | 300 getpage_wait_seconds_bucket | 6 | 300 getpage_wait_seconds_bucket | 10 | 300 getpage_wait_seconds_bucket | 20 | 300 getpage_wait_seconds_bucket | 30 | 300 getpage_wait_seconds_bucket | 60 | 300 getpage_wait_seconds_bucket | 100 | 300 getpage_wait_seconds_bucket | Infinity | 300 getpage_prefetch_requests_total | | 69 getpage_sync_requests_total | | 231 getpage_prefetch_misses_total | | 0 getpage_prefetch_discards_total | | 0 pageserver_requests_sent_total | | 323 pageserver_requests_disconnects_total | | 0 pageserver_send_flushes_total | | 323 file_cache_hits_total | | 0 (39 rows) ```
- Loading branch information
Showing
12 changed files
with
533 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit | ||
|
||
|
||
CREATE FUNCTION get_backend_perf_counters() | ||
RETURNS SETOF RECORD | ||
AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters' | ||
LANGUAGE C PARALLEL SAFE; | ||
|
||
CREATE FUNCTION get_perf_counters() | ||
RETURNS SETOF RECORD | ||
AS 'MODULE_PATHNAME', 'neon_get_perf_counters' | ||
LANGUAGE C PARALLEL SAFE; | ||
|
||
-- Show various metrics, for each backend. Note that the values are not reset | ||
-- when a backend exits. When a new backend starts with the backend ID, it will | ||
-- continue accumulating the values from where the old backend left. If you are | ||
-- only interested in the changes from your own session, store the values at the | ||
-- beginning of the session somewhere, and subtract them on subsequent calls. | ||
-- | ||
-- For histograms, 'bucket_le' is the upper bound of the histogram bucket. | ||
CREATE VIEW neon_backend_perf_counters AS | ||
SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value | ||
FROM get_backend_perf_counters() AS P ( | ||
procno integer, | ||
pid integer, | ||
metric text, | ||
bucket_le float8, | ||
value float8 | ||
); | ||
|
||
-- Summary across all backends. (This could also be implemented with | ||
-- an aggregate query over neon_backend_perf_counters view.) | ||
CREATE VIEW neon_perf_counters AS | ||
SELECT P.metric, P.bucket_le, P.value | ||
FROM get_perf_counters() AS P ( | ||
metric text, | ||
bucket_le float8, | ||
value float8 | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
DROP VIEW IF EXISTS neon_perf_counters; | ||
DROP VIEW IF EXISTS neon_backend_perf_counters; | ||
DROP FUNCTION IF EXISTS get_perf_counters(); | ||
DROP FUNCTION IF EXISTS get_backend_perf_counters(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
/*------------------------------------------------------------------------- | ||
* | ||
* neon_perf_counters.c | ||
* Collect statistics about Neon I/O | ||
* | ||
* Each backend has its own set of counters in shared memory. | ||
* | ||
*------------------------------------------------------------------------- | ||
*/ | ||
#include "postgres.h" | ||
|
||
#include <math.h> | ||
|
||
#include "funcapi.h" | ||
#include "miscadmin.h" | ||
#include "storage/proc.h" | ||
#include "storage/shmem.h" | ||
#include "utils/builtins.h" | ||
|
||
#include "neon_perf_counters.h" | ||
#include "neon_pgversioncompat.h" | ||
|
||
neon_per_backend_counters *neon_per_backend_counters_shared; | ||
|
||
Size | ||
NeonPerfCountersShmemSize(void) | ||
{ | ||
Size size = 0; | ||
|
||
size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters))); | ||
|
||
return size; | ||
} | ||
|
||
bool | ||
NeonPerfCountersShmemInit(void) | ||
{ | ||
bool found; | ||
|
||
neon_per_backend_counters_shared = | ||
ShmemInitStruct("Neon perf counters", | ||
mul_size(MaxBackends, | ||
sizeof(neon_per_backend_counters)), | ||
&found); | ||
Assert(found == IsUnderPostmaster); | ||
if (!found) | ||
{ | ||
/* shared memory is initialized to zeros, so nothing to do here */ | ||
} | ||
} | ||
|
||
/* | ||
* Count a GetPage wait operation. | ||
*/ | ||
void | ||
inc_getpage_wait(uint64 latency_us) | ||
{ | ||
int lo = 0; | ||
int hi = NUM_GETPAGE_WAIT_BUCKETS - 1; | ||
|
||
/* Find the right bucket with binary search */ | ||
while (lo < hi) | ||
{ | ||
int mid = (lo + hi) / 2; | ||
|
||
if (latency_us < getpage_wait_bucket_thresholds[mid]) | ||
hi = mid; | ||
else | ||
lo = mid + 1; | ||
} | ||
MyNeonCounters->getpage_wait_us_bucket[lo]++; | ||
MyNeonCounters->getpage_wait_us_sum += latency_us; | ||
MyNeonCounters->getpage_wait_us_count++; | ||
} | ||
|
||
/* | ||
* Support functions for the views, neon_backend_perf_counters and | ||
* neon_perf_counters. | ||
*/ | ||
|
||
typedef struct | ||
{ | ||
char *name; | ||
bool is_bucket; | ||
double bucket_le; | ||
double value; | ||
} metric_t; | ||
|
||
static metric_t * | ||
neon_perf_counters_to_metrics(neon_per_backend_counters *counters) | ||
{ | ||
#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8) | ||
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t)); | ||
uint64 bucket_accum; | ||
int i = 0; | ||
Datum getpage_wait_str; | ||
|
||
metrics[i].name = "getpage_wait_seconds_count"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->getpage_wait_us_count; | ||
i++; | ||
metrics[i].name = "getpage_wait_seconds_sum"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0; | ||
i++; | ||
|
||
bucket_accum = 0; | ||
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++) | ||
{ | ||
uint64 threshold = getpage_wait_bucket_thresholds[bucketno]; | ||
|
||
bucket_accum += counters->getpage_wait_us_bucket[bucketno]; | ||
|
||
metrics[i].name = "getpage_wait_seconds_bucket"; | ||
metrics[i].is_bucket = true; | ||
metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0; | ||
metrics[i].value = (double) bucket_accum; | ||
i++; | ||
} | ||
metrics[i].name = "getpage_prefetch_requests_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->getpage_prefetch_requests_total; | ||
i++; | ||
metrics[i].name = "getpage_sync_requests_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->getpage_sync_requests_total; | ||
i++; | ||
metrics[i].name = "getpage_prefetch_misses_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->getpage_prefetch_misses_total; | ||
i++; | ||
metrics[i].name = "getpage_prefetch_discards_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->getpage_prefetch_discards_total; | ||
i++; | ||
metrics[i].name = "pageserver_requests_sent_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->pageserver_requests_sent_total; | ||
i++; | ||
metrics[i].name = "pageserver_requests_disconnects_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->pageserver_disconnects_total; | ||
i++; | ||
metrics[i].name = "pageserver_send_flushes_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->pageserver_send_flushes_total; | ||
i++; | ||
metrics[i].name = "file_cache_hits_total"; | ||
metrics[i].is_bucket = false; | ||
metrics[i].value = (double) counters->file_cache_hits_total; | ||
i++; | ||
|
||
Assert(i == NUM_METRICS); | ||
|
||
/* NULL entry marks end of array */ | ||
metrics[i].name = NULL; | ||
metrics[i].value = 0; | ||
|
||
return metrics; | ||
} | ||
|
||
/* | ||
* Write metric to three output Datums | ||
*/ | ||
static void | ||
metric_to_datums(metric_t *m, Datum *values, bool *nulls) | ||
{ | ||
values[0] = CStringGetTextDatum(m->name); | ||
nulls[0] = false; | ||
if (m->is_bucket) | ||
{ | ||
values[1] = Float8GetDatum(m->bucket_le); | ||
nulls[1] = false; | ||
} | ||
else | ||
{ | ||
values[1] = (Datum) 0; | ||
nulls[1] = true; | ||
} | ||
values[2] = Float8GetDatum(m->value); | ||
nulls[2] = false; | ||
} | ||
|
||
PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters); | ||
Datum | ||
neon_get_backend_perf_counters(PG_FUNCTION_ARGS) | ||
{ | ||
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; | ||
Datum values[5]; | ||
bool nulls[5]; | ||
|
||
/* We put all the tuples into a tuplestore in one go. */ | ||
InitMaterializedSRF(fcinfo, 0); | ||
|
||
for (int procno = 0; procno < MaxBackends; procno++) | ||
{ | ||
PGPROC *proc = GetPGProcByNumber(procno); | ||
int pid = proc->pid; | ||
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; | ||
metric_t *metrics = neon_perf_counters_to_metrics(counters); | ||
|
||
values[0] = Int32GetDatum(procno); | ||
nulls[0] = false; | ||
values[1] = Int32GetDatum(pid); | ||
nulls[1] = false; | ||
|
||
for (int i = 0; metrics[i].name != NULL; i++) | ||
{ | ||
metric_to_datums(&metrics[i], &values[2], &nulls[2]); | ||
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); | ||
} | ||
|
||
pfree(metrics); | ||
} | ||
|
||
return (Datum) 0; | ||
} | ||
|
||
PG_FUNCTION_INFO_V1(neon_get_perf_counters); | ||
Datum | ||
neon_get_perf_counters(PG_FUNCTION_ARGS) | ||
{ | ||
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; | ||
Datum values[3]; | ||
bool nulls[3]; | ||
Datum getpage_wait_str; | ||
neon_per_backend_counters totals = {0}; | ||
metric_t *metrics; | ||
|
||
/* We put all the tuples into a tuplestore in one go. */ | ||
InitMaterializedSRF(fcinfo, 0); | ||
|
||
/* Aggregate the counters across all backends */ | ||
for (int procno = 0; procno < MaxBackends; procno++) | ||
{ | ||
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno]; | ||
|
||
totals.getpage_wait_us_count += counters->getpage_wait_us_count; | ||
totals.getpage_wait_us_sum += counters->getpage_wait_us_sum; | ||
for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++) | ||
totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno]; | ||
totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total; | ||
totals.getpage_sync_requests_total += counters->getpage_sync_requests_total; | ||
totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total; | ||
totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total; | ||
totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total; | ||
totals.pageserver_disconnects_total += counters->pageserver_disconnects_total; | ||
totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total; | ||
totals.file_cache_hits_total += counters->file_cache_hits_total; | ||
} | ||
|
||
metrics = neon_perf_counters_to_metrics(&totals); | ||
for (int i = 0; metrics[i].name != NULL; i++) | ||
{ | ||
metric_to_datums(&metrics[i], &values[0], &nulls[0]); | ||
tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); | ||
} | ||
pfree(metrics); | ||
|
||
return (Datum) 0; | ||
} |
Oops, something went wrong.
263dfba
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
4986 tests run: 4821 passed, 1 failed, 164 skipped (full report)
Failures on Postgres 14
test_replica_query_race
: release-x86-64Flaky tests (3)
Postgres 17
test_timeline_archive[4]
: release-arm64Postgres 16
test_delete_timeline_client_hangup
: release-x86-64Postgres 15
test_ondemand_wal_download_in_replication_slot_funcs
: release-arm64Test coverage report is not available
263dfba at 2024-09-23T21:51:15.245Z :recycle: