Merge pull request 'feat: authentication Prometheus set-up' (#123) from feat-alert-manager into master

Reviewed-on: https://gitea.freeleaps.mathmast.com/freeleaps/freeleaps-ops/pulls/123
This commit is contained in:
Easonzyc 2025-10-15 02:44:58 +00:00
commit 07307ab06c
2 changed files with 65 additions and 0 deletions

View File

@ -0,0 +1,37 @@
{{- /*
Copyright Broadcom, Inc. All Rights Reserved.
SPDX-License-Identifier: APACHE-2.0
*/}}
{{- if .Values.authentication.prometheusRule.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ .Values.authentication.prometheusRule.name }}
namespace: {{ .Values.authentication.prometheusRule.namespace | quote }}
{{- with .Values.authentication.prometheusRule.labels }}
labels:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
groups:
{{- with .Values.authentication.prometheusRule.rules }}
- name: {{ $.Values.authentication.prometheusRule.name }}
rules:
{{- range . }}
- alert: {{ .alert }}
expr: {{ .expr | quote }}
{{- if .for }}
for: {{ .for }}
{{- end }}
{{- if .labels }}
labels:
{{- toYaml .labels | nindent 12 }}
{{- end }}
{{- if .annotations }}
annotations:
{{- toYaml .annotations | nindent 12 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View File

@ -115,3 +115,31 @@ authentication:
controlledResources: controlledResources:
- cpu - cpu
- memory - memory
prometheusRule:
name: freepeals-alpha-authentication
enabled: false
namespace: freeleaps-monitoring-system
labels:
release: kube-prometheus-stack
rules:
- alert: FreeleapsAuthenticationServiceDown
expr: up{job="authentication-service"} == 0
for: 1m
labels:
severity: critical
service: authentication-service
annotations:
summary: Freeleaps Authentication service is down (instance {{ $labels.instance }})
description: Freeleaps Authentication service has been down for more than 1 minutes.
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7
- alert: FreeleapsAuthenticationServiceHighErrorRate
expr: rate(http_requests_total{job="authentication-service",status=~"5.."}[5m]) > 0.1
for: 5m
labels:
severity: warning
service: authentication-service
annotations:
summary: High error rate in freeleaps authentication service (instance {{ $labels.instance }})
description: Freeleaps Authentication service error rate is {{ $value }} errors per second.
runbook_url: https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7