Skip to content

Commit

Permalink
Merge pull request #527 from vyzigold/nodeExporter-hostname
Browse files Browse the repository at this point in the history
[OSPRH-9848] enhance NodeExporter metrics
  • Loading branch information
openshift-merge-bot[bot] authored Nov 15, 2024
2 parents d01484b + 5130e45 commit 0ed9296
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 34 deletions.
85 changes: 65 additions & 20 deletions controllers/metricstorage_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,13 @@ type MetricStorageReconciler struct {
Cache cache.Cache
}

// ConnectionInfo holds information about connection to a compute node
type ConnectionInfo struct {
IP string
Hostname string
TLS bool
}

// GetLogger returns a logger object with a prefix of "conroller.name" and aditional controller context fields
func (r *MetricStorageReconciler) GetLogger(ctx context.Context) logr.Logger {
return log.FromContext(ctx).WithName("Controllers").WithName("MetricStorage")
Expand Down Expand Up @@ -506,7 +513,7 @@ func (r *MetricStorageReconciler) createServiceScrapeConfig(
log logr.Logger,
description string,
serviceName string,
targets []string,
targets interface{},
tlsEnabled bool,
) error {
scrapeConfig := &monv1alpha1.ScrapeConfig{
Expand Down Expand Up @@ -593,30 +600,31 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
return ctrl.Result{}, err
}

// ScrapeConfigs for NodeExporters
endpointsNonTLS, endpointsTLS, err := getMetricExporterTargets(instance, helper, telemetryv1.DefaultNodeExporterPort)
connectionInfo, err := getComputeNodesConnectionInfo(instance, helper)
if err != nil {
Log.Info(fmt.Sprintf("Cannot get node exporter targets. Scrape configs not created. Error: %s", err))
Log.Info(fmt.Sprintf("Cannot get compute node connection info. Scrape configs not created. Error: %s", err))
}

// ScrapeConfigs for NodeExporters
neTargetsTLS, neTargetsNonTLS := getNodeExporterTargets(connectionInfo)
// ScrapeConfig for non-tls nodes
neServiceName := fmt.Sprintf("%s-node-exporter", telemetry.ServiceName)
err = r.createServiceScrapeConfig(ctx, instance, Log, "Node Exporter",
neServiceName, endpointsNonTLS, false)
neServiceName, neTargetsNonTLS, false)
if err != nil {
return ctrl.Result{}, err
}

// ScrapeConfig for tls nodes
neServiceName = fmt.Sprintf("%s-node-exporter-tls", telemetry.ServiceName)
err = r.createServiceScrapeConfig(ctx, instance, Log, "Node Exporter",
neServiceName, endpointsTLS, true)
neServiceName, neTargetsTLS, true)
if err != nil {
return ctrl.Result{}, err
}

// kepler scrape endpoints
_, keplerEndpoints, err := getMetricExporterTargets(instance, helper, telemetryv1.DefaultKeplerPort)
keplerEndpoints, _ := getKeplerTargets(connectionInfo)
if err != nil {
Log.Info(fmt.Sprintf("Cannot get Kepler targets. Scrape configs not created. Error: %s", err))
}
Expand All @@ -633,6 +641,37 @@ func (r *MetricStorageReconciler) createScrapeConfigs(
return ctrl.Result{}, err
}

func getNodeExporterTargets(nodes []ConnectionInfo) ([]metricstorage.LabeledTarget, []metricstorage.LabeledTarget) {
tls := []metricstorage.LabeledTarget{}
nonTLS := []metricstorage.LabeledTarget{}
for _, node := range nodes {
target := metricstorage.LabeledTarget{
IP: fmt.Sprintf("%s:%d", node.IP, telemetryv1.DefaultNodeExporterPort),
Hostname: node.Hostname,
}
if node.TLS {
tls = append(tls, target)
} else {
nonTLS = append(nonTLS, target)
}
}
return tls, nonTLS
}

func getKeplerTargets(nodes []ConnectionInfo) ([]string, []string) {
tls := []string{}
nonTLS := []string{}
for _, node := range nodes {
target := fmt.Sprintf("%s:%d", node.IP, telemetryv1.DefaultKeplerPort)
if node.TLS {
tls = append(tls, target)
} else {
nonTLS = append(nonTLS, target)
}
}
return tls, nonTLS
}

func (r *MetricStorageReconciler) createDashboardObjects(ctx context.Context, instance *telemetryv1.MetricStorage, eventHandler handler.EventHandler) (ctrl.Result, error) {
Log := r.GetLogger(ctx)
uiPluginObj := &obsui.UIPlugin{
Expand Down Expand Up @@ -795,26 +834,24 @@ func (r *MetricStorageReconciler) ensureWatches(
return err
}

func getMetricExporterTargets(
func getComputeNodesConnectionInfo(
instance *telemetryv1.MetricStorage,
helper *helper.Helper,
defaultMetricExporterPort int32,
) ([]string, []string, error) {
) ([]ConnectionInfo, error) {
ipSetList, err := getIPSetList(instance, helper)
if err != nil {
return []string{}, []string{}, err
return []ConnectionInfo{}, err
}
inventorySecretList, err := getInventorySecretList(instance, helper)
if err != nil {
return []string{}, []string{}, err
return []ConnectionInfo{}, err
}
var address string
addressesNonTLS := []string{}
addressesTLS := []string{}
connectionInfo := []ConnectionInfo{}
for _, secret := range inventorySecretList.Items {
inventory, err := ansible.UnmarshalYAML(secret.Data["inventory"])
if err != nil {
return []string{}, []string{}, err
return []ConnectionInfo{}, err
}
nodeSetGroup := inventory.Groups[secret.Labels["openstackdataplanenodeset"]]
containsTelemetry := false
Expand Down Expand Up @@ -842,20 +879,28 @@ func getMetricExporterTargets(
address, _ = getAddressFromAnsibleHost(&item)
} else {
// we were unable to find an IP or HostName for a node, so we do not go further
return addressesNonTLS, addressesTLS, fmt.Errorf("failed to find an IP or HostName for node %s", name)
return connectionInfo, fmt.Errorf("failed to find an IP or HostName for node %s", name)
}
if address == "" {
// we were unable to find an IP or HostName for a node, so we do not go further
return addressesNonTLS, addressesTLS, fmt.Errorf("failed to find an IP or HostName for node %s", name)
return connectionInfo, fmt.Errorf("failed to find an IP or HostName for node %s", name)
}
if TLSEnabled, ok := nodeSetGroup.Vars["edpm_tls_certs_enabled"].(bool); ok && TLSEnabled {
addressesTLS = append(addressesTLS, fmt.Sprintf("%s:%d", address, defaultMetricExporterPort))
connectionInfo = append(connectionInfo, ConnectionInfo{
IP: address,
Hostname: name,
TLS: true,
})
} else {
addressesNonTLS = append(addressesNonTLS, fmt.Sprintf("%s:%d", address, defaultMetricExporterPort))
connectionInfo = append(connectionInfo, ConnectionInfo{
IP: address,
Hostname: name,
TLS: false,
})
}
}
}
return addressesNonTLS, addressesTLS, nil
return connectionInfo, nil
}

func getIPSetList(instance *telemetryv1.MetricStorage, helper *helper.Helper) (*infranetworkv1.IPSetList, error) {
Expand Down
39 changes: 29 additions & 10 deletions pkg/metricstorage/scrape_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type LabeledTarget struct {
IP string
Hostname string
}

// ScrapeConfig creates a ScrapeConfig CR
func ScrapeConfig(
instance *telemetryv1.MetricStorage,
labels map[string]string,
targets []string,
targets interface{},
tlsEnabled bool,
) *monv1alpha1.ScrapeConfig {
var scrapeInterval monv1.Duration
Expand All @@ -46,10 +51,28 @@ func ScrapeConfig(
scrapeInterval = telemetryv1.DefaultScrapeInterval
}

sort.Strings(targets)
var convertedTargets []monv1alpha1.Target
for _, t := range targets {
convertedTargets = append(convertedTargets, monv1alpha1.Target(t))
var staticConfigs []monv1alpha1.StaticConfig
if ips, ok := targets.([]string); ok {
sort.Strings(ips)
var convertedTargets []monv1alpha1.Target
for _, t := range ips {
convertedTargets = append(convertedTargets, monv1alpha1.Target(t))
}
staticConfigs = append(staticConfigs, monv1alpha1.StaticConfig{
Targets: convertedTargets,
})
} else if labeledTargets, ok := targets.([]LabeledTarget); ok {
sort.Slice(labeledTargets, func(i, j int) bool {
return labeledTargets[i].IP < labeledTargets[j].IP
})
for _, t := range labeledTargets {
staticConfigs = append(staticConfigs, monv1alpha1.StaticConfig{
Targets: []monv1alpha1.Target{monv1alpha1.Target(t.IP)},
Labels: map[monv1.LabelName]string{
"hostname": t.Hostname,
},
})
}
}

scrapeConfig := &monv1alpha1.ScrapeConfig{
Expand Down Expand Up @@ -82,11 +105,7 @@ func ScrapeConfig(
},
},
ScrapeInterval: &scrapeInterval,
StaticConfigs: []monv1alpha1.StaticConfig{
{
Targets: convertedTargets,
},
},
StaticConfigs: staticConfigs,
},
}

Expand Down
15 changes: 13 additions & 2 deletions tests/kuttl/suites/metricstorage/tests/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,19 @@ metadata:
- kind: MetricStorage
name: telemetry-kuttl
spec:
staticConfigs:
- {}
scrapeInterval: 30s
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
metadata:
labels:
service: metricStorage
name: telemetry-node-exporter-tls
ownerReferences:
- kind: MetricStorage
name: telemetry-kuttl
spec:
scrapeInterval: 30s
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
Expand Down
15 changes: 13 additions & 2 deletions tests/kuttl/suites/metricstorage/tests/04-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,19 @@ metadata:
- kind: MetricStorage
name: telemetry-kuttl
spec:
staticConfigs:
- {}
scrapeInterval: 40s
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
metadata:
labels:
service: metricStorage
name: telemetry-node-exporter-tls
ownerReferences:
- kind: MetricStorage
name: telemetry-kuttl
spec:
scrapeInterval: 40s
---
apiVersion: monitoring.rhobs/v1alpha1
kind: ScrapeConfig
Expand Down

0 comments on commit 0ed9296

Please sign in to comment.