From 5d0876fdc7fa4c6cca2733d0de514675e7cca73b Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 05:50:51 +0000 Subject: [PATCH 1/8] ci(bump): bump metrics image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/metrics/values.alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freeleaps/helm-pkg/metrics/values.alpha.yaml b/freeleaps/helm-pkg/metrics/values.alpha.yaml index b4ad5ee0..0a315dd6 100644 --- a/freeleaps/helm-pkg/metrics/values.alpha.yaml +++ b/freeleaps/helm-pkg/metrics/values.alpha.yaml @@ -15,7 +15,7 @@ metrics: registry: docker.io repository: null name: metrics - tag: snapshot-96f2f52 + tag: snapshot-3cba9e4 imagePullPolicy: IfNotPresent ports: - name: http From 1c79b10e6f021327999326a7433f8f589aaef0d9 Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 06:00:54 +0000 Subject: [PATCH 2/8] ci(bump): bump authentication image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/authentication/values.alpha.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/freeleaps/helm-pkg/authentication/values.alpha.yaml b/freeleaps/helm-pkg/authentication/values.alpha.yaml index 911554c0..53e761b3 100644 --- a/freeleaps/helm-pkg/authentication/values.alpha.yaml +++ b/freeleaps/helm-pkg/authentication/values.alpha.yaml @@ -1,7 +1,3 @@ -global: - registry: docker.io - repository: freeleaps - nodeSelector: {} dashboard: enabled: false logIngest: @@ -17,7 +13,7 @@ authentication: registry: docker.io repository: null name: authentication - tag: snapshot-96f2f52 + tag: snapshot-3cba9e4 imagePullPolicy: IfNotPresent ports: - name: http From ff12647613b0cca576acdcaaab76eaeef55756b2 Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 06:09:18 +0000 Subject: [PATCH 3/8] ci(bump): bump centralStorage image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/centralStorage/values.alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freeleaps/helm-pkg/centralStorage/values.alpha.yaml b/freeleaps/helm-pkg/centralStorage/values.alpha.yaml index ea5a2868..aad933eb 100644 --- a/freeleaps/helm-pkg/centralStorage/values.alpha.yaml +++ b/freeleaps/helm-pkg/centralStorage/values.alpha.yaml @@ -15,7 +15,7 @@ centralStorage: registry: docker.io repository: null name: central_storage - tag: snapshot-96f2f52 + tag: snapshot-3cba9e4 imagePullPolicy: IfNotPresent ports: - name: http From 8f7beabe4c0276354194e346a3c5b390a43c5bc1 Mon Sep 17 00:00:00 2001 From: icecheng Date: Tue, 23 Sep 2025 14:20:52 +0800 Subject: [PATCH 4/8] feat: add prometheusrule for metrics service --- ...r and Error Alter Integration Guideline.md | 32 +++++++++ .../templates/metrics/prometheusrule.yaml | 37 ++++++++++ freeleaps/helm-pkg/metrics/values.alpha.yaml | 27 +++++++ freeleaps/helm-pkg/metrics/values.prod.yaml | 26 +++++++ freeleaps/helm-pkg/metrics/values.yaml | 72 ++++++++++++++++++- .../prod/ci/freeleaps-service-hub/Jenkinsfile | 42 +++++------ 6 files changed, 212 insertions(+), 24 deletions(-) create mode 100644 docs/Service Monitor and Error Alter Integration Guideline.md create mode 100644 freeleaps/helm-pkg/metrics/templates/metrics/prometheusrule.yaml diff --git a/docs/Service Monitor and Error Alter Integration Guideline.md b/docs/Service Monitor and Error Alter Integration Guideline.md new file mode 100644 index 00000000..0d9369b9 --- /dev/null +++ b/docs/Service Monitor and Error Alter Integration Guideline.md @@ -0,0 +1,32 @@ +# Prometheus Alter Rule Config + +Add `prometheusrule.yaml` to `/templates`. +see +``` +{{- /* +Copyright Broadcom, Inc. All Rights Reserved. +SPDX-License-Identifier: APACHE-2.0 +*/}} + +{{- if and .Values.metrics.enabled .Values.metrics.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ include "common.names.fullname" . }} + namespace: {{ default (include "common.names.namespace" .) .Values.metrics.prometheusRule.namespace | quote}} + labels: {{- include "common.labels.standard" ( dict "customLabels" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- if .Values.metrics.prometheusRule.additionalLabels }} + {{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + groups: + {{- with .Values.metrics.prometheusRule.rules }} + - name: {{ template "common.names.name" $ }} + rules: {{- include "common.tplvalues.render" (dict "value" . "context" $) | nindent 8 }} + {{- end }} +{{- end }} + +``` \ No newline at end of file diff --git a/freeleaps/helm-pkg/metrics/templates/metrics/prometheusrule.yaml b/freeleaps/helm-pkg/metrics/templates/metrics/prometheusrule.yaml new file mode 100644 index 00000000..c9877e0e --- /dev/null +++ b/freeleaps/helm-pkg/metrics/templates/metrics/prometheusrule.yaml @@ -0,0 +1,37 @@ +{{- /* +Copyright Broadcom, Inc. All Rights Reserved. +SPDX-License-Identifier: APACHE-2.0 +*/}} + +{{- if .Values.metrics.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ .Values.metrics.prometheusRule.name }} + namespace: {{ .Values.metrics.prometheusRule.namespace | quote }} + {{- with .Values.metrics.prometheusRule.labels }} + labels: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + groups: + {{- with .Values.metrics.prometheusRule.rules }} + - name: {{ $.Values.metrics.prometheusRule.name }} + rules: + {{- range . }} + - alert: {{ .alert }} + expr: {{ .expr | quote }} + {{- if .for }} + for: {{ .for }} + {{- end }} + {{- if .labels }} + labels: + {{- toYaml .labels | nindent 12 }} + {{- end }} + {{- if .annotations }} + annotations: + {{- toYaml .annotations | nindent 12 }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/freeleaps/helm-pkg/metrics/values.alpha.yaml b/freeleaps/helm-pkg/metrics/values.alpha.yaml index 6972690d..709beac5 100644 --- a/freeleaps/helm-pkg/metrics/values.alpha.yaml +++ b/freeleaps/helm-pkg/metrics/values.alpha.yaml @@ -81,3 +81,30 @@ metrics: controlledResources: - cpu - memory + prometheusRule: + name: freepeals-alpha-metrics + enabled: false + namespace: "freeleaps-monitoring-system" + rules: + - alert: FreeleapsMetricsServiceDown + expr: up{job="metrics-service"} == 0 + for: 1m + labels: + severity: critical + service: metrics-service + annotations: + summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service has been down for more than 1 minutes." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" + + - alert: FreeleapsMetricsServiceHighErrorRate + expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: metrics-service + annotations: + summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service error rate is {{ $value }} errors per second." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" + diff --git a/freeleaps/helm-pkg/metrics/values.prod.yaml b/freeleaps/helm-pkg/metrics/values.prod.yaml index 9c036f1f..618c52a2 100644 --- a/freeleaps/helm-pkg/metrics/values.prod.yaml +++ b/freeleaps/helm-pkg/metrics/values.prod.yaml @@ -81,3 +81,29 @@ metrics: controlledResources: - cpu - memory + prometheusRule: + name: freepeals-prod-metrics + enabled: true + namespace: "freeleaps-monitoring-system" + rules: + - alert: FreeleapsMetricsServiceDown + expr: up{job="metrics-service"} == 0 + for: 1m + labels: + severity: critical + service: metrics-service + annotations: + summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service has been down for more than 1 minutes." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" + + - alert: FreeleapsMetricsServiceHighErrorRate + expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: metrics-service + annotations: + summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service error rate is {{ $value }} errors per second." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" \ No newline at end of file diff --git a/freeleaps/helm-pkg/metrics/values.yaml b/freeleaps/helm-pkg/metrics/values.yaml index 2aefffe1..5a1d5c8f 100644 --- a/freeleaps/helm-pkg/metrics/values.yaml +++ b/freeleaps/helm-pkg/metrics/values.yaml @@ -55,12 +55,12 @@ metrics: port: 8009 targetPort: 8009 serviceMonitor: - enabled: false + enabled: true labels: release: kube-prometheus-stack namespace: freeleaps-monitoring-system - internal: 30s - scrapeTimeout: '' + interval: 30s + scrapeTimeout: 10s configs: starrocksHost: "" starrocksPort: 8009 @@ -80,3 +80,69 @@ metrics: controlledResources: - cpu - memory + prometheusRule: + name: freepeals-metrics + enabled: true + namespace: "freeleaps-monitoring-system" + rules: + - alert: FreeleapsMetricsServiceDown + expr: up{job="metrics-service"} == 0 + for: 1m + labels: + severity: critical + service: metrics-service + annotations: + summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service has been down for more than 1 minutes." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" + + - alert: FreeleapsMetricsServiceHighErrorRate + expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: metrics-service + annotations: + summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})" + description: "Freeleaps Metrics service error rate is {{ $value }} errors per second." + runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" + + # - alert: MetricsServiceHighLatency + # expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="metrics-service"}[5m])) > 1 + # for: 5m + # labels: + # severity: warning + # service: metrics-service + # annotations: + # summary: "High latency in metrics service (instance {{ $labels.instance }})" + # description: "95th percentile latency is {{ $value }} seconds." + + # - alert: MetricsServiceHighMemoryUsage + # expr: (process_resident_memory_bytes{job="metrics-service"} / 1024 / 1024) > 512 + # for: 5m + # labels: + # severity: warning + # service: metrics + # annotations: + # summary: "High memory usage in metrics service (instance {{ $labels.instance }})" + # description: "Memory usage is {{ $value }} MB." + + # - alert: MetricsServiceHighCPUUsage + # expr: rate(process_cpu_seconds_total{job="metrics-service"}[5m]) * 100 > 80 + # for: 5m + # labels: + # severity: warning + # service: metrics + # annotations: + # summary: "High CPU usage in metrics service (instance {{ $labels.instance }})" + # description: "CPU usage is {{ $value }}%." + + # - alert: MetricsServiceNoData + # expr: absent(up{job="metrics-service"}) + # for: 5m + # labels: + # severity: critical + # service: metrics + # annotations: + # summary: "No data from metrics service (instance {{ $labels.instance }})" + # description: "No metrics data received from metrics service for more than 5 minutes." diff --git a/freeleaps/prod/ci/freeleaps-service-hub/Jenkinsfile b/freeleaps/prod/ci/freeleaps-service-hub/Jenkinsfile index a5f8ec5c..c1faec17 100644 --- a/freeleaps/prod/ci/freeleaps-service-hub/Jenkinsfile +++ b/freeleaps/prod/ci/freeleaps-service-hub/Jenkinsfile @@ -10,6 +10,27 @@ executeFreeleapsPipeline { executeMode = 'fully' commitMessageLintEnabled = false components = [ + [ + name: 'metrics', + root: 'apps/metrics', + language: 'python', + dependenciesManager: 'pip', + requirementsFile: 'requirements.txt', + buildCacheEnabled: true, + buildAgentImage: 'python:3.12-slim', + buildArtifacts: ['.'], + lintEnabled: false, + sastEnabled: false, + imageRegistry: 'docker.io', + imageRepository: 'freeleaps', + imageName: 'devops', + imageBuilder: 'dind', + dockerfilePath: 'Dockerfile', + imageBuildRoot: '.', + imageReleaseArchitectures: ['linux/amd64', 'linux/arm64/v8'], + registryCredentialsId: 'freeleaps-devops-docker-hub-credentials', + semanticReleaseEnabled: true + ], [ name: 'authentication', root: 'apps/authentication', @@ -135,27 +156,6 @@ executeFreeleapsPipeline { imageReleaseArchitectures: ['linux/amd64', 'linux/arm64/v8'], registryCredentialsId: 'freeleaps-devops-docker-hub-credentials', semanticReleaseEnabled: true - ], - [ - name: 'metrics', - root: 'apps/metrcis', - language: 'python', - dependenciesManager: 'pip', - requirementsFile: 'requirements.txt', - buildCacheEnabled: true, - buildAgentImage: 'python:3.12-slim', - buildArtifacts: ['.'], - lintEnabled: false, - sastEnabled: false, - imageRegistry: 'docker.io', - imageRepository: 'freeleaps', - imageName: 'devops', - imageBuilder: 'dind', - dockerfilePath: 'Dockerfile', - imageBuildRoot: '.', - imageReleaseArchitectures: ['linux/amd64', 'linux/arm64/v8'], - registryCredentialsId: 'freeleaps-devops-docker-hub-credentials', - semanticReleaseEnabled: true ] ] } \ No newline at end of file From 2ecfc54f7b9e9dfddc8cc271d915e5d38d9116b9 Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 06:46:27 +0000 Subject: [PATCH 5/8] ci(bump): bump metrics image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/metrics/values.alpha.yaml | 44 ++++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/freeleaps/helm-pkg/metrics/values.alpha.yaml b/freeleaps/helm-pkg/metrics/values.alpha.yaml index 826963e2..823193c9 100644 --- a/freeleaps/helm-pkg/metrics/values.alpha.yaml +++ b/freeleaps/helm-pkg/metrics/values.alpha.yaml @@ -84,27 +84,25 @@ metrics: prometheusRule: name: freepeals-alpha-metrics enabled: false - namespace: "freeleaps-monitoring-system" + namespace: freeleaps-monitoring-system rules: - - alert: FreeleapsMetricsServiceDown - expr: up{job="metrics-service"} == 0 - for: 1m - labels: - severity: critical - service: metrics-service - annotations: - summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})" - description: "Freeleaps Metrics service has been down for more than 1 minutes." - runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" - - - alert: FreeleapsMetricsServiceHighErrorRate - expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 - for: 5m - labels: - severity: warning - service: metrics-service - annotations: - summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})" - description: "Freeleaps Metrics service error rate is {{ $value }} errors per second." - runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7" - + - alert: FreeleapsMetricsServiceDown + expr: up{job="metrics-service"} == 0 + for: 1m + labels: + severity: critical + service: metrics-service + annotations: + summary: Freeleaps Metrics service is down (instance {{ $labels.instance }}) + description: Freeleaps Metrics service has been down for more than 1 minutes. + runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 + - alert: FreeleapsMetricsServiceHighErrorRate + expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1 + for: 5m + labels: + severity: warning + service: metrics-service + annotations: + summary: High error rate in freeleaps metrics service (instance {{ $labels.instance }}) + description: Freeleaps Metrics service error rate is {{ $value }} errors per second. + runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7 From caa385348f51a0cc83f743045ee381bbd6bac00a Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 07:10:30 +0000 Subject: [PATCH 6/8] ci(bump): bump content image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/content/values.alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freeleaps/helm-pkg/content/values.alpha.yaml b/freeleaps/helm-pkg/content/values.alpha.yaml index ffe7bf14..ae415c81 100644 --- a/freeleaps/helm-pkg/content/values.alpha.yaml +++ b/freeleaps/helm-pkg/content/values.alpha.yaml @@ -15,7 +15,7 @@ content: registry: docker.io repository: null name: content - tag: snapshot-96f2f52 + tag: snapshot-3cba9e4 imagePullPolicy: IfNotPresent ports: - name: http From 3613428b3bc54619992bec6eee6fbaf0ab02228a Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 07:17:03 +0000 Subject: [PATCH 7/8] ci(bump): bump notification image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/notification/values.alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freeleaps/helm-pkg/notification/values.alpha.yaml b/freeleaps/helm-pkg/notification/values.alpha.yaml index 63d39d16..291ada4e 100644 --- a/freeleaps/helm-pkg/notification/values.alpha.yaml +++ b/freeleaps/helm-pkg/notification/values.alpha.yaml @@ -15,7 +15,7 @@ notification: registry: docker.io repository: null name: notification - tag: snapshot-96f2f52 + tag: snapshot-3cba9e4 imagePullPolicy: IfNotPresent ports: - name: http From 2be57cb278c8be249754b43c44d1d10139dc2c4e Mon Sep 17 00:00:00 2001 From: freeleaps-gitops-bot Date: Tue, 23 Sep 2025 07:22:30 +0000 Subject: [PATCH 8/8] ci(bump): bump payment image version for alpha to snapshot-3cba9e4 --- freeleaps/helm-pkg/payment/values.alpha.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freeleaps/helm-pkg/payment/values.alpha.yaml b/freeleaps/helm-pkg/payment/values.alpha.yaml index bd027110..419bfda7 100644 --- a/freeleaps/helm-pkg/payment/values.alpha.yaml +++ b/freeleaps/helm-pkg/payment/values.alpha.yaml @@ -15,7 +15,7 @@ payment: registry: docker.io repository: null name: payment - tag: snapshot-96f2f52 + tag: snapshot-3cba9e4 imagePullPolicy: IfNotPresent ports: - name: http