diff --git a/.env b/.env index adc21721c8..ccd277c06e 100644 --- a/.env +++ b/.env @@ -28,7 +28,7 @@ OTEL_COLLECTOR_HOST=otelcol OTEL_COLLECTOR_PORT_GRPC=4317 OTEL_COLLECTOR_PORT_HTTP=4318 OTEL_COLLECTOR_CONFIG=./src/otelcollector/otelcol-config.yml -OTEL_COLLECTOR_CONFIG_EXTRAS=./src/otelcollector/otelcol-config-extras.yml +OTEL_COLLECTOR_CONFIG_EXTRAS=./instana/otelcollector/otelcol-config-extras.yml OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_GRPC} PUBLIC_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:8080/otlp-http/v1/traces diff --git a/instana/otelcollector/otelcol-config-extras.yml b/instana/otelcollector/otelcol-config-extras.yml new file mode 100644 index 0000000000..f8b23ffc6f --- /dev/null +++ b/instana/otelcollector/otelcol-config-extras.yml @@ -0,0 +1,39 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# extra settings to be merged into OpenTelemetry Collector configuration +# do not delete this file + +## Example configuration for sending data to your own OTLP HTTP backend +## Note: the spanmetrics exporter must be included in the exporters array +## if overriding the traces pipeline. +## +processors: + resource: + attributes: + - key: service.instance.id + value: otel-demo + action: upsert + +# Replace the INSTANA_ENDPOINT, more information: +# https://www.ibm.com/docs/en/instana-observability/current?topic=opentelemetry-sending-data-instana +exporters: + otlp/instana: + endpoint: INSTANA_ENDPOINT + tls: + insecure: true + +service: + pipelines: + traces: + receivers: [otlp] + processors: [transform, batch, resource] + exporters: [otlp, debug, spanmetrics, otlp/instana] + metrics: + receivers: [otlp, spanmetrics] + processors: [batch, resource] + exporters: [otlphttp/prometheus, debug, otlp/instana] + logs: + receivers: [otlp] + processors: [batch, resource] + exporters: [opensearch, debug] diff --git a/src/checkoutservice/main.go b/src/checkoutservice/main.go index c67251aaab..bb82539616 100644 --- a/src/checkoutservice/main.go +++ b/src/checkoutservice/main.go @@ -155,6 +155,11 @@ func main() { } }() + meter := mp.Meter("checkoutservice") + if err := recordRuntimeMetrics(meter); err != nil { + log.Fatal(err) + } + err := runtime.Start(runtime.WithMinimumReadMemStatsInterval(time.Second)) if err != nil { log.Fatal(err) diff --git a/src/checkoutservice/runtime_metrics.go b/src/checkoutservice/runtime_metrics.go new file mode 100644 index 0000000000..b69bf38a02 --- /dev/null +++ b/src/checkoutservice/runtime_metrics.go @@ -0,0 +1,61 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 +package main + +import ( + "context" + "math" + "runtime" + + "go.opentelemetry.io/otel/metric" +) + +func recordRuntimeMetrics(meter metric.Meter) error { + // Create metric instruments + + var ( + err error + + memSys metric.Int64ObservableUpDownCounter + pauseTotalMs metric.Int64ObservableCounter + ) + + if pauseTotalMs, err = meter.Int64ObservableCounter( + "process.runtime.go.gc.pause_total_ms", + metric.WithDescription("Cumulative nanoseconds in GC stop-the-world pauses since the program started"), + ); err != nil { + return err + } + + if memSys, err = meter.Int64ObservableUpDownCounter( + "process.runtime.go.mem.sys", + metric.WithUnit("By"), + metric.WithDescription("Bytes of memory obtained from the OS"), + ); err != nil { + return err + } + + // Record the runtime stats periodically + if _, err := meter.RegisterCallback( + func(ctx context.Context, o metric.Observer) error { + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + + o.ObserveInt64(pauseTotalMs, clampUint64(memStats.PauseTotalNs)/1e6) // GC Pause in ms + o.ObserveInt64(memSys, clampUint64(memStats.Sys)) + return nil + }, + pauseTotalMs, memSys, + ); err != nil { + return err + } + + return nil +} + +func clampUint64(v uint64) int64 { + if v > math.MaxInt64 { + return math.MaxInt64 + } + return int64(v) +} diff --git a/src/paymentservice/charge.js b/src/paymentservice/charge.js index 7fcb7fbdaa..9f63620fd8 100644 --- a/src/paymentservice/charge.js +++ b/src/paymentservice/charge.js @@ -9,6 +9,7 @@ const { FlagdProvider} = require('@openfeature/flagd-provider'); const flagProvider = new FlagdProvider(); const logger = require('./logger'); +const runtimeMetrics = require('./runtime-metrics'); const tracer = trace.getTracer('paymentservice'); const meter = metrics.getMeter('paymentservice'); const transactionsCounter = meter.createCounter('app.payment.transactions') @@ -64,5 +65,8 @@ module.exports.charge = async request => { const { units, nanos, currencyCode } = request.amount; logger.info({transactionId, cardType, lastFourDigits, amount: { units, nanos, currencyCode }}, "Transaction complete."); transactionsCounter.add(1, {"app.payment.currency": currencyCode}) + + runtimeMetrics.setupRuntimeMetrics(); + return { transactionId } } diff --git a/src/paymentservice/opentelemetry.js b/src/paymentservice/opentelemetry.js index 0ba569cd30..623a309b1b 100644 --- a/src/paymentservice/opentelemetry.js +++ b/src/paymentservice/opentelemetry.js @@ -11,6 +11,7 @@ const {awsEc2Detector, awsEksDetector} = require('@opentelemetry/resource-detect const {containerDetector} = require('@opentelemetry/resource-detector-container') const {gcpDetector} = require('@opentelemetry/resource-detector-gcp') const {envDetector, hostDetector, osDetector, processDetector} = require('@opentelemetry/resources') +const {RuntimeNodeInstrumentation} = require('@opentelemetry/instrumentation-runtime-node') const sdk = new opentelemetry.NodeSDK({ traceExporter: new OTLPTraceExporter(), @@ -20,6 +21,9 @@ const sdk = new opentelemetry.NodeSDK({ '@opentelemetry/instrumentation-fs': { requireParentSpan: true, }, + }), + new RuntimeNodeInstrumentation({ + monitoringPrecision: 5000, }) ], metricReader: new PeriodicExportingMetricReader({ @@ -38,4 +42,4 @@ const sdk = new opentelemetry.NodeSDK({ ], }) -sdk.start(); +sdk.start(); \ No newline at end of file diff --git a/src/paymentservice/package-lock.json b/src/paymentservice/package-lock.json index d2719f2bb3..fad9651dbb 100644 --- a/src/paymentservice/package-lock.json +++ b/src/paymentservice/package-lock.json @@ -18,6 +18,7 @@ "@opentelemetry/core": "1.24.1", "@opentelemetry/exporter-metrics-otlp-grpc": "0.51.1", "@opentelemetry/exporter-trace-otlp-grpc": "0.51.1", + "@opentelemetry/instrumentation-runtime-node": "^0.10.0", "@opentelemetry/resource-detector-alibaba-cloud": "0.28.9", "@opentelemetry/resource-detector-aws": "1.5.0", "@opentelemetry/resource-detector-container": "0.3.9", @@ -909,6 +910,61 @@ "@opentelemetry/api": "^1.3.0" } }, + "node_modules/@opentelemetry/instrumentation-runtime-node": { + "version": "0.10.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-runtime-node/-/instrumentation-runtime-node-0.10.0.tgz", + "integrity": "sha512-hvwbi9iSqiclqSkRNFLW/3/v5drUPfD8V9zAmXZWZA68X0yygb2nPQlGce9Aba7p3l+Z4wGL8WcPtHDN2XzpoQ==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.55.0" + }, + "engines": { + "node": ">=17.4.0" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-runtime-node/node_modules/@opentelemetry/api-logs": { + "version": "0.55.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.55.0.tgz", + "integrity": "sha512-3cpa+qI45VHYcA5c0bHM6VHo9gicv3p5mlLHNG3rLyjQU8b7e0st1rWtrUn3JbZ3DwwCfhKop4eQ9UuYlC6Pkg==", + "dependencies": { + "@opentelemetry/api": "^1.3.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@opentelemetry/instrumentation-runtime-node/node_modules/@opentelemetry/instrumentation": { + "version": "0.55.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation/-/instrumentation-0.55.0.tgz", + "integrity": "sha512-YDCMlaQRZkziLL3t6TONRgmmGxDx6MyQDXRD0dknkkgUZtOK5+8MWft1OXzmNu6XfBOdT12MKN5rz+jHUkafKQ==", + "dependencies": { + "@opentelemetry/api-logs": "0.55.0", + "@types/shimmer": "^1.2.0", + "import-in-the-middle": "^1.8.1", + "require-in-the-middle": "^7.1.1", + "semver": "^7.5.2", + "shimmer": "^1.2.1" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-runtime-node/node_modules/import-in-the-middle": { + "version": "1.11.2", + "resolved": "https://registry.npmjs.org/import-in-the-middle/-/import-in-the-middle-1.11.2.tgz", + "integrity": "sha512-gK6Rr6EykBcc6cVWRSBR5TWf8nn6hZMYSRYqCcHa0l0d1fPK7JSYo6+Mlmck76jIX9aL/IZ71c06U2VpFwl1zA==", + "dependencies": { + "acorn": "^8.8.2", + "acorn-import-attributes": "^1.9.5", + "cjs-module-lexer": "^1.2.2", + "module-details-from-path": "^1.0.3" + } + }, "node_modules/@opentelemetry/instrumentation-socket.io": { "version": "0.39.0", "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-socket.io/-/instrumentation-socket.io-0.39.0.tgz", @@ -1571,9 +1627,9 @@ } }, "node_modules/@types/shimmer": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@types/shimmer/-/shimmer-1.0.5.tgz", - "integrity": "sha512-9Hp0ObzwwO57DpLFF0InUjUm/II8GmKAvzbefxQTihCb7KI6yc9yzf0nLc4mVdby5N4DRCgQM2wCup9KTieeww==" + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@types/shimmer/-/shimmer-1.2.0.tgz", + "integrity": "sha512-UE7oxhQLLd9gub6JKIAhDq06T0F6FnztwMNRvYgjeQSBeMc1ZG/tA47EwfduvkuQS8apbkM/lpLpWsaCeYsXVg==" }, "node_modules/@types/tedious": { "version": "4.0.14", diff --git a/src/paymentservice/package.json b/src/paymentservice/package.json index f51782201d..ccf79a81b0 100644 --- a/src/paymentservice/package.json +++ b/src/paymentservice/package.json @@ -16,18 +16,19 @@ "@grpc/proto-loader": "0.7.13", "@openfeature/flagd-provider": "0.13.0", "@openfeature/server-sdk": "1.14.0", + "@opentelemetry/api": "1.8.0", "@opentelemetry/auto-instrumentations-node": "0.46.1", "@opentelemetry/core": "1.24.1", - "@opentelemetry/resources": "1.24.1", - "@opentelemetry/api": "1.8.0", - "@opentelemetry/sdk-metrics": "1.24.1", - "@opentelemetry/exporter-trace-otlp-grpc": "0.51.1", "@opentelemetry/exporter-metrics-otlp-grpc": "0.51.1", - "@opentelemetry/sdk-node": "0.51.1", + "@opentelemetry/exporter-trace-otlp-grpc": "0.51.1", + "@opentelemetry/instrumentation-runtime-node": "^0.10.0", "@opentelemetry/resource-detector-alibaba-cloud": "0.28.9", "@opentelemetry/resource-detector-aws": "1.5.0", "@opentelemetry/resource-detector-container": "0.3.9", "@opentelemetry/resource-detector-gcp": "0.29.9", + "@opentelemetry/resources": "1.24.1", + "@opentelemetry/sdk-metrics": "1.24.1", + "@opentelemetry/sdk-node": "0.51.1", "grpc-js-health-check": "1.1.0", "pino": "8.16.1", "simple-card-validator": "1.1.0", diff --git a/src/paymentservice/runtime-metrics.js b/src/paymentservice/runtime-metrics.js new file mode 100644 index 0000000000..5629920a64 --- /dev/null +++ b/src/paymentservice/runtime-metrics.js @@ -0,0 +1,236 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 +const {metrics} = require('@opentelemetry/api'); +const process = require('process'); +const v8 = require('v8'); +const perf_hooks = require('perf_hooks'); +const { monitorEventLoopDelay } = require('perf_hooks'); + +const meter = metrics.getMeter('paymentservice'); +// new metrics +// GC Metrics tracking +let minorGcCount = 0; +let majorGcCount = 0; +let lastGcPause = 0; +let heapSizeAfterGc = 0; + +// Create observable gauges for all metrics +const gcPauseGauge = meter.createObservableGauge('nodejs.gc.gcPause', { + description: 'GC Pause in milliseconds', + unit: 'ms', +}); + +const activeHandlesGauge = meter.createObservableGauge('nodejs.activeHandles', { + description: 'Number of active handles', + unit: '{handles}', +}); + +const activeRequestsGauge = meter.createObservableGauge('nodejs.activeRequests', { + description: 'Number of active requests', + unit: '{requests}', +}); + +const minorGcsGauge = meter.createObservableGauge('nodejs.gc.minorGcs', { + description: 'Number of minor GCs', + unit: '{gcs}', +}); + +const majorGcsGauge = meter.createObservableGauge('nodejs.gc.majorGcs', { + description: 'Number of major GCs', + unit: '{gcs}', +}); + +const rssGauge = meter.createObservableGauge('nodejs.memory.rss', { + description: 'Resident Set Size', + unit: 'bytes', +}); + +const heapUsedGauge = meter.createObservableGauge('nodejs.memory.heapUsed', { + description: 'Heap Size Used', + unit: 'bytes', +}); + +const heapSizeAfterGcGauge = meter.createObservableGauge('nodejs.gc.usedHeapSizeAfterGc', { + description: 'Heap Size After GC', + unit: 'bytes', +}); + +// Event Loop Metrics +const eventLoopMaxGauge = meter.createObservableGauge('nodejs.libuv.max', { + description: 'Longest time spent in a single loop', + unit: 'ms', +}); + +const eventLoopSumGauge = meter.createObservableGauge('nodejs.libuv.sum', { + description: 'Total time spent in loop', + unit: 'ms', +}); + +const eventLoopLagGauge = meter.createObservableGauge('nodejs.libuv.lag', { + description: 'Event loop lag', + unit: 'ms', +}); + +const eventLoopCountGauge = meter.createObservableGauge('nodejs.libuv.num', { + description: 'Loops per second', + unit: '{loops}', +}); + +// Heap Spaces Metrics +const heapSpacesUsedGauge = meter.createObservableGauge('nodejs.heapSpaces.used', { + description: 'Heap Spaces Used', + unit: 'bytes', +}); + +const heapSpacesAvailableGauge = meter.createObservableGauge('nodejs.heapSpaces.available', { + description: 'Heap Spaces Available', + unit: 'bytes', +}); + +const heapSpacesCurrentGauge = meter.createObservableGauge('nodejs.heapSpaces.current', { + description: 'Heap Spaces Current', + unit: 'bytes', +}); + +const heapSpacesPhysicalGauge = meter.createObservableGauge('nodejs.heapSpaces.physical', { + description: 'Heap Spaces Physical', + unit: 'bytes', +}); + +// Set up performance observer for GC events +const obs = new perf_hooks.PerformanceObserver((list) => { + const entries = list.getEntries(); + entries.forEach((entry) => { + // Update GC metrics based on the type of GC + if (entry.kind === perf_hooks.constants.NODE_PERFORMANCE_GC_MAJOR) { + majorGcCount++; + } else { + minorGcCount++; + } + lastGcPause = entry.duration; + heapSizeAfterGc = process.memoryUsage().heapUsed; + }); +}); + +// Subscribe to GC events +obs.observe({ entryTypes: ['gc'], buffered: true }); + +const histogram = monitorEventLoopDelay({ resolution: 20 }); +histogram.enable(); + +let globalLastState = { + timestamp: process.hrtime.bigint(), + count: histogram.count, + sum: histogram.mean * histogram.count, + lastCollection: Date.now() +}; + +function eventLoopCollectMetrics() { + const attributes = { type: 'loops' }; + const now = Date.now(); + + const timeSinceLastCollection = now - globalLastState.lastCollection; + if (timeSinceLastCollection < 900) { + return; + } + + eventLoopMaxGauge.addCallback((observableResult) => { + const maxValue = Math.round(histogram.max / 1e6 * 1000) / 1000; + observableResult.observe(maxValue, attributes); + }); + + eventLoopSumGauge.addCallback((observableResult) => { + const currentState = { + timestamp: process.hrtime.bigint(), + count: histogram.count, + sum: histogram.mean * histogram.count + }; + + const deltaTime = Number(currentState.timestamp - globalLastState.timestamp) / 1e9; + const deltaSum = (currentState.sum - globalLastState.sum) / 1e6; + + if (deltaTime >= 0.9) { + const timePerSecond = deltaSum / deltaTime; + observableResult.observe(timePerSecond, attributes); + globalLastState.sum = currentState.sum; + globalLastState.timestamp = currentState.timestamp; + } + }); + + eventLoopLagGauge.addCallback((observableResult) => { + const lagValue = Math.round(histogram.mean / 1e6 * 1000) / 1000; + observableResult.observe(lagValue, attributes); + }); + + eventLoopCountGauge.addCallback((observableResult) => { + const currentCount = histogram.count; + const deltaTime = Number(process.hrtime.bigint() - globalLastState.timestamp) / 1e9; + const deltaCount = currentCount - globalLastState.count; + + if (deltaTime >= 0.9) { + const loopsPerSecond = Math.round(deltaCount / deltaTime); + observableResult.observe(loopsPerSecond, attributes); + globalLastState.count = currentCount; + } + }); + + globalLastState.lastCollection = now; + +} + +// Set up callbacks for all observable metrics +function setupRuntimeMetrics() { + // Memory metrics callback + const memoryCallback = (observableResult) => { + const memoryUsage = process.memoryUsage(); + observableResult.observe(memoryUsage.rss, { type: 'rss' }); + observableResult.observe(memoryUsage.heapUsed, { type: 'heapUsed' }); + }; + + // Active handles and requests callback + const handleCallback = (observableResult) => { + observableResult.observe(process._getActiveHandles().length, { type: 'handles' }); + observableResult.observe(process._getActiveRequests().length, { type: 'requests' }); + }; + + // GC metrics callback + const gcCallback = (observableResult) => { + observableResult.observe(minorGcCount, { type: 'minor' }); + observableResult.observe(majorGcCount, { type: 'major' }); + observableResult.observe(lastGcPause, { type: 'pause' }); + observableResult.observe(heapSizeAfterGc, { type: 'heapAfterGc' }); + }; + + // Heap spaces callback + const heapSpacesCallback = (observableResult) => { + const heapSpaces = v8.getHeapSpaceStatistics(); + heapSpaces.forEach(space => { + const attributes = { space: space.space_name }; + observableResult.observe(space.space_used_size, { ...attributes, metric: 'used' }); + observableResult.observe(space.space_available_size, { ...attributes, metric: 'available' }); + observableResult.observe(space.space_size, { ...attributes, metric: 'current' }); + observableResult.observe(space.physical_space_size, { ...attributes, metric: 'physical' }); + }); + }; + + // Register all callbacks + rssGauge.addCallback(memoryCallback); + heapUsedGauge.addCallback(memoryCallback); + activeHandlesGauge.addCallback(handleCallback); + activeRequestsGauge.addCallback(handleCallback); + minorGcsGauge.addCallback(gcCallback); + majorGcsGauge.addCallback(gcCallback); + gcPauseGauge.addCallback(gcCallback); + heapSizeAfterGcGauge.addCallback(gcCallback); + + heapSpacesUsedGauge.addCallback(heapSpacesCallback); + heapSpacesAvailableGauge.addCallback(heapSpacesCallback); + heapSpacesCurrentGauge.addCallback(heapSpacesCallback); + heapSpacesPhysicalGauge.addCallback(heapSpacesCallback); + + eventLoopCollectMetrics(); +} + +module.exports = { + setupRuntimeMetrics +};