Merge branch 'master' of https://gitea.freeleaps.mathmast.com/freeleaps/freeleaps-ops
This commit is contained in:
commit
3f09d87b5a
283
docs/Azure_K8s_Node_Addition_Runbook.md
Normal file
283
docs/Azure_K8s_Node_Addition_Runbook.md
Normal file
@ -0,0 +1,283 @@
|
||||
# Azure Kubernetes Node Addition Runbook
|
||||
|
||||
## Overview
|
||||
This runbook provides step-by-step instructions for adding new Azure Virtual Machines to an existing Kubernetes cluster installed via Kubespray.
|
||||
|
||||
## Prerequisites
|
||||
- Access to Azure CLI with appropriate permissions
|
||||
- SSH access to the new VM
|
||||
- Access to the existing Kubernetes cluster
|
||||
- Kubespray installation directory
|
||||
|
||||
## Pre-Installation Checklist
|
||||
|
||||
### 1. Verify New VM Details
|
||||
```bash
|
||||
# Get VM details from Azure
|
||||
az vm show --resource-group <RESOURCE_GROUP> --name <VM_NAME> --query "{name:name,ip:publicIps,privateIp:privateIps}" -o table
|
||||
```
|
||||
|
||||
### 2. Verify SSH Access
|
||||
```bash
|
||||
# Test SSH connection to the new VM
|
||||
ssh wwwadmin@mathmast.com@<VM_PRIVATE_IP>
|
||||
# You will be prompted for password
|
||||
```
|
||||
|
||||
### 3. Verify Network Connectivity
|
||||
```bash
|
||||
# From the new VM, test connectivity to existing cluster
|
||||
ping <EXISTING_MASTER_IP>
|
||||
```
|
||||
|
||||
## Step-by-Step Process
|
||||
|
||||
### Step 1: Update Ansible Inventory
|
||||
|
||||
1. **Navigate to Kubespray directory**
|
||||
```bash
|
||||
cd freeleaps-ops/3rd/kubespray
|
||||
```
|
||||
|
||||
2. **Edit the inventory file**
|
||||
```bash
|
||||
vim ../cluster/ansible/manifests/inventory.ini
|
||||
```
|
||||
|
||||
3. **Add the new node to the appropriate group**
|
||||
|
||||
For a worker node:
|
||||
```ini
|
||||
[kube_node]
|
||||
# Existing nodes...
|
||||
prod-usw2-k8s-freeleaps-worker-nodes-06 ansible_host=<NEW_VM_PRIVATE_IP> ansible_user=wwwadmin@mathmast.com host_name=prod-usw2-k8s-freeleaps-worker-nodes-06
|
||||
```
|
||||
|
||||
For a master node:
|
||||
```ini
|
||||
[kube_control_plane]
|
||||
# Existing nodes...
|
||||
prod-usw2-k8s-freeleaps-master-03 ansible_host=<NEW_VM_PRIVATE_IP> ansible_user=wwwadmin@mathmast.com etcd_member_name=freeleaps-etcd-03 host_name=prod-usw2-k8s-freeleaps-master-03
|
||||
```
|
||||
|
||||
### Step 2: Verify Inventory Configuration
|
||||
|
||||
1. **Check inventory syntax**
|
||||
```bash
|
||||
ansible-inventory -i ../cluster/ansible/manifests/inventory.ini --list
|
||||
```
|
||||
|
||||
2. **Test connectivity to new node**
|
||||
```bash
|
||||
ansible -i ../cluster/ansible/manifests/inventory.ini kube_node -m ping -kK
|
||||
```
|
||||
|
||||
### Step 3: Run Kubespray Scale Playbook
|
||||
|
||||
1. **Execute the scale playbook**
|
||||
```bash
|
||||
cd ../cluster/ansible/manifests
|
||||
ansible-playbook -i inventory.ini ../../3rd/kubespray/scale.yml -kK -b
|
||||
```
|
||||
|
||||
**Note**:
|
||||
- `-k` prompts for SSH password
|
||||
- `-K` prompts for sudo password
|
||||
- `-b` enables privilege escalation
|
||||
|
||||
### Step 4: Verify Node Addition
|
||||
|
||||
1. **Check node status**
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
2. **Verify node is ready**
|
||||
```bash
|
||||
kubectl describe node <NEW_NODE_NAME>
|
||||
```
|
||||
|
||||
3. **Check node labels**
|
||||
```bash
|
||||
kubectl get nodes --show-labels
|
||||
```
|
||||
|
||||
### Step 5: Post-Installation Verification
|
||||
|
||||
1. **Test pod scheduling**
|
||||
```bash
|
||||
# Create a test pod to verify scheduling
|
||||
kubectl run test-pod --image=nginx --restart=Never
|
||||
kubectl get pod test-pod -o wide
|
||||
```
|
||||
|
||||
2. **Check node resources**
|
||||
```bash
|
||||
kubectl top nodes
|
||||
```
|
||||
|
||||
3. **Verify node components**
|
||||
```bash
|
||||
kubectl get pods -n kube-system -o wide | grep <NEW_NODE_NAME>
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### 1. SSH Connection Failed
|
||||
```bash
|
||||
# Verify VM is running
|
||||
az vm show --resource-group <RESOURCE_GROUP> --name <VM_NAME> --query "powerState"
|
||||
|
||||
# Check network security groups
|
||||
az network nsg rule list --resource-group <RESOURCE_GROUP> --nsg-name <NSG_NAME>
|
||||
```
|
||||
|
||||
#### 2. Ansible Connection Failed
|
||||
```bash
|
||||
# Test with verbose output
|
||||
ansible -i ../cluster/ansible/manifests/inventory.ini kube_node -m ping -kK -vvv
|
||||
```
|
||||
|
||||
#### 3. Node Not Ready
|
||||
```bash
|
||||
# Check node conditions
|
||||
kubectl describe node <NEW_NODE_NAME>
|
||||
|
||||
# Check kubelet logs
|
||||
kubectl logs -n kube-system kubelet-<NEW_NODE_NAME>
|
||||
```
|
||||
|
||||
#### 4. Pod Scheduling Issues
|
||||
```bash
|
||||
# Check node taints
|
||||
kubectl get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints
|
||||
|
||||
# Check node capacity
|
||||
kubectl describe node <NEW_NODE_NAME> | grep -A 10 "Capacity"
|
||||
```
|
||||
|
||||
### Recovery Procedures
|
||||
|
||||
#### If Scale Playbook Fails
|
||||
1. **Clean up the failed node**
|
||||
```bash
|
||||
kubectl delete node <NEW_NODE_NAME>
|
||||
```
|
||||
|
||||
2. **Reset the VM**
|
||||
```bash
|
||||
# Reset VM to clean state
|
||||
az vm restart --resource-group <RESOURCE_GROUP> --name <VM_NAME>
|
||||
```
|
||||
|
||||
3. **Retry the scale playbook**
|
||||
```bash
|
||||
ansible-playbook -i inventory.ini ../../3rd/kubespray/scale.yml -kK -b
|
||||
```
|
||||
|
||||
#### If Node is Stuck in NotReady State
|
||||
1. **Check kubelet service**
|
||||
```bash
|
||||
ssh wwwadmin@mathmast.com@<VM_PRIVATE_IP>
|
||||
sudo systemctl status kubelet
|
||||
```
|
||||
|
||||
2. **Restart kubelet**
|
||||
```bash
|
||||
ssh wwwadmin@mathmast.com@<VM_PRIVATE_IP>
|
||||
sudo systemctl restart kubelet
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
### 1. Network Security
|
||||
- Ensure the new VM is in the correct subnet
|
||||
- Verify network security group rules allow cluster communication
|
||||
- Check firewall rules if applicable
|
||||
|
||||
### 2. Access Control
|
||||
- Use SSH key-based authentication when possible
|
||||
- Limit sudo access to necessary commands
|
||||
- Monitor node access logs
|
||||
|
||||
### 3. Compliance
|
||||
- Ensure the new node meets security requirements
|
||||
- Verify all required security patches are applied
|
||||
- Check compliance with organizational policies
|
||||
|
||||
## Monitoring and Maintenance
|
||||
|
||||
### 1. Node Health Monitoring
|
||||
```bash
|
||||
# Set up monitoring for the new node
|
||||
kubectl get nodes -o wide
|
||||
kubectl top nodes
|
||||
```
|
||||
|
||||
### 2. Resource Monitoring
|
||||
```bash
|
||||
# Monitor resource usage
|
||||
kubectl describe node <NEW_NODE_NAME> | grep -A 5 "Allocated resources"
|
||||
```
|
||||
|
||||
### 3. Log Monitoring
|
||||
```bash
|
||||
# Monitor kubelet logs
|
||||
kubectl logs -n kube-system kubelet-<NEW_NODE_NAME> --tail=100 -f
|
||||
```
|
||||
|
||||
## Rollback Procedures
|
||||
|
||||
### If Node Addition Causes Issues
|
||||
|
||||
1. **Cordon the node**
|
||||
```bash
|
||||
kubectl cordon <NEW_NODE_NAME>
|
||||
```
|
||||
|
||||
2. **Drain the node**
|
||||
```bash
|
||||
kubectl drain <NEW_NODE_NAME> --ignore-daemonsets --delete-emptydir-data
|
||||
```
|
||||
|
||||
3. **Remove the node**
|
||||
```bash
|
||||
kubectl delete node <NEW_NODE_NAME>
|
||||
```
|
||||
|
||||
4. **Update inventory**
|
||||
```bash
|
||||
# Remove the node from inventory.ini
|
||||
vim ../cluster/ansible/manifests/inventory.ini
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
### Required Information
|
||||
- VM name and IP address
|
||||
- Resource group and subscription
|
||||
- Node role (worker/master)
|
||||
- Date and time of addition
|
||||
- Person performing the addition
|
||||
|
||||
### Post-Addition Checklist
|
||||
- [ ] Node appears in `kubectl get nodes`
|
||||
- [ ] Node status is Ready
|
||||
- [ ] Pods can be scheduled on the node
|
||||
- [ ] All node components are running
|
||||
- [ ] Monitoring is configured
|
||||
- [ ] Documentation is updated
|
||||
|
||||
## Emergency Contacts
|
||||
|
||||
- **Infrastructure Team**: [Contact Information]
|
||||
- **Kubernetes Administrators**: [Contact Information]
|
||||
- **Azure Support**: [Contact Information]
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: [Date]
|
||||
**Version**: 1.0
|
||||
**Author**: [Name]
|
||||
409
docs/Current_Ingress_Analysis.md
Normal file
409
docs/Current_Ingress_Analysis.md
Normal file
@ -0,0 +1,409 @@
|
||||
# Current Ingress Setup Analysis
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This document analyzes your current Kubernetes ingress setup based on the codebase examination. It explains how your ingress infrastructure works, what components are involved, and how they interact.
|
||||
|
||||
---
|
||||
|
||||
## 📊 **Your Current Ingress Architecture**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ INTERNET │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ Browser │ │ Mobile │ │ API │ │ Other │ │
|
||||
│ │ │ │ App │ │ Client │ │ Clients │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ └────────────────┼───────────────┼───────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ AZURE LOAD BALANCER │ │
|
||||
│ │ IP: 4.155.160.32 (prod-usw2-k8s-freeleaps-lb-fe-ip) │ │
|
||||
│ │ Port: 80/443 │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ NGINX INGRESS CONTROLLER │ │
|
||||
│ │ Namespace: freeleaps-controls-system │ │
|
||||
│ │ ┌─────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ │ Pod: ingress-nginx-controller-abc123 │ │ │
|
||||
│ │ │ Image: ingress-nginx/controller:v1.12.0 │ │ │
|
||||
│ │ │ IP: 10.0.1.100 Port: 80/443 │ │ │
|
||||
│ │ └─────────────────────────────────────────────────────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ INGRESS RULES │ │
|
||||
│ │ │ │
|
||||
│ │ argo.mathmast.com → argo-cd-server:80 │ │
|
||||
│ │ gitea.freeleaps.mathmast.com → gitea-http:3000 │ │
|
||||
│ │ magicleaps.mathmast.com → magicleaps-frontend-service:80 │ │
|
||||
│ │ alpha.magicleaps.mathmast.com → magicleaps-frontend-service:80 │ │
|
||||
│ │ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ KUBERNETES SERVICES │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │argo-cd-svc │ │gitea-http │ │magic-front │ │magic-api │ │ │
|
||||
│ │ │ClusterIP │ │ClusterIP │ │ClusterIP │ │ClusterIP │ │ │
|
||||
│ │ │10.0.1.10 │ │10.0.1.11 │ │10.0.1.12 │ │10.0.1.13 │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ APPLICATION PODS │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │argo-cd-pod │ │gitea-pod │ │magic-front │ │magic-api │ │ │
|
||||
│ │ │10.0.1.101 │ │10.0.1.102 │ │10.0.1.103 │ │10.0.1.104 │ │ │
|
||||
│ │ │argo-cd:v2.8 │ │gitea:1.20 │ │nginx:latest │ │api:v1.2 │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Components Analysis**
|
||||
|
||||
### **1. Nginx Ingress Controller**
|
||||
|
||||
**Location**: `freeleaps-ops/cluster/manifests/freeleaps-controls-system/ingress-nginx/values.yaml`
|
||||
|
||||
**Key Configuration**:
|
||||
```yaml
|
||||
# Controller Configuration
|
||||
controller:
|
||||
name: controller
|
||||
image:
|
||||
image: ingress-nginx/controller
|
||||
tag: "v1.12.0" # ← Specific version for stability
|
||||
runAsNonRoot: true # ← Security: don't run as root
|
||||
runAsUser: 101 # ← Security: run as nginx user
|
||||
allowPrivilegeEscalation: false # ← Security: prevent privilege escalation
|
||||
|
||||
# Ingress Class Configuration
|
||||
ingressClassResource:
|
||||
name: nginx # ← Ingress class name
|
||||
enabled: true # ← Create the IngressClass resource
|
||||
default: false # ← Not the default (allows multiple controllers)
|
||||
controllerValue: k8s.io/ingress-nginx # ← Controller identifier
|
||||
|
||||
# Service Configuration
|
||||
service:
|
||||
type: LoadBalancer # ← Azure Load Balancer for external access
|
||||
ports:
|
||||
http: 80 # ← HTTP port
|
||||
https: 443 # ← HTTPS port
|
||||
```
|
||||
|
||||
**What this means**:
|
||||
- You have a production-grade nginx-ingress-controller
|
||||
- It's configured with security best practices
|
||||
- It uses Azure Load Balancer for external access
|
||||
- It's not the default ingress class (allows flexibility)
|
||||
|
||||
### **2. Cert-Manager Integration**
|
||||
|
||||
**Location**: `freeleaps-ops/cluster/manifests/freeleaps-controls-system/godaddy-webhook/cluster-issuer.yaml`
|
||||
|
||||
**Key Configuration**:
|
||||
```yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: mathmast-dot-com
|
||||
spec:
|
||||
acme:
|
||||
email: acme@mathmast.com
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
solvers:
|
||||
- dns01:
|
||||
webhook:
|
||||
config:
|
||||
apiKeySecretRef:
|
||||
name: mathmast-godaddy-api-key
|
||||
groupName: acme.mathmast.com
|
||||
solverName: godaddy
|
||||
selector:
|
||||
dnsZones:
|
||||
- mathmast.com
|
||||
```
|
||||
|
||||
**What this means**:
|
||||
- You're using Let's Encrypt for SSL certificates
|
||||
- DNS01 challenge for domain validation (more reliable than HTTP01)
|
||||
- GoDaddy DNS API integration for automatic DNS record creation
|
||||
- Certificates are automatically renewed
|
||||
|
||||
### **3. Custom Ingress Manager**
|
||||
|
||||
**Location**: `freeleaps-devops-reconciler/reconciler/controllers/ingress_resources/ingress_manager.py`
|
||||
|
||||
**Key Features**:
|
||||
```python
|
||||
# Automatic Ingress Creation
|
||||
annotations = {
|
||||
"nginx.ingress.kubernetes.io/ssl-redirect": "true",
|
||||
"nginx.ingress.kubernetes.io/force-ssl-redirect": "true",
|
||||
"cert-manager.io/cluster-issuer": "letsencrypt-prod",
|
||||
"nginx.ingress.kubernetes.io/proxy-body-size": "0",
|
||||
"nginx.ingress.kubernetes.io/proxy-read-timeout": "600",
|
||||
"nginx.ingress.kubernetes.io/proxy-send-timeout": "600"
|
||||
}
|
||||
```
|
||||
|
||||
**What this means**:
|
||||
- You have a custom controller that automatically creates ingresses
|
||||
- It enforces SSL redirect (HTTP → HTTPS)
|
||||
- It integrates with cert-manager for automatic certificates
|
||||
- It sets performance optimizations (timeouts, body size)
|
||||
|
||||
---
|
||||
|
||||
## 🔄 **Request Flow Analysis**
|
||||
|
||||
### **1. External Request Flow**
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Browser │ │ Azure Load │ │ Nginx │ │ Application │
|
||||
│ │ │ Balancer │ │ Ingress │ │ Service │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
||||
│ │ │ │
|
||||
│ HTTPS Request │ │ │
|
||||
│───────────────▶│ │ │
|
||||
│ │ Forward to │ │
|
||||
│ │ nginx │ │
|
||||
│ │───────────────▶│ │
|
||||
│ │ │ Route based │
|
||||
│ │ │ on host/path │
|
||||
│ │ │───────────────▶│
|
||||
│ │ │ │ Return response
|
||||
│ │ │◀───────────────│
|
||||
│ │◀───────────────│ │
|
||||
│◀───────────────│ │ │
|
||||
```
|
||||
|
||||
### **2. SSL Certificate Flow**
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Ingress │ │ cert-manager │ │ Let's │ │ GoDaddy │
|
||||
│ Controller │ │ │ │ Encrypt │ │ DNS API │
|
||||
└─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
||||
│ │ │ │
|
||||
│ Check cert │ │ │
|
||||
│───────────────▶│ │ │
|
||||
│ │ Request cert │ │
|
||||
│ │───────────────▶│ │
|
||||
│ │ │ DNS Challenge │
|
||||
│ │ │───────────────▶│
|
||||
│ │ │ │ Create TXT record
|
||||
│ │ │ │◀───────────────│
|
||||
│ │ │ Cert Ready │
|
||||
│ │ │◀───────────────│
|
||||
│ │ Cert Ready │ │
|
||||
│ │◀───────────────│ │
|
||||
│ Cert Ready │ │ │
|
||||
│◀───────────────│ │ │
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ **Current Applications**
|
||||
|
||||
Based on your codebase, you have these applications exposed via ingress:
|
||||
|
||||
### **1. ArgoCD (GitOps)**
|
||||
- **Domain**: `argo.mathmast.com`
|
||||
- **Service**: `argo-cd-server`
|
||||
- **Purpose**: GitOps deployment tool
|
||||
- **Access**: Web UI for managing deployments
|
||||
- **Namespace**: `freeleaps-devops-system`
|
||||
|
||||
### **2. Gitea (Git Repository)**
|
||||
- **Domain**: `gitea.freeleaps.mathmast.com`
|
||||
- **Service**: `gitea-http`
|
||||
- **Purpose**: Git repository hosting
|
||||
- **Access**: Web UI for code management
|
||||
- **Namespace**: `freeleaps-prod`
|
||||
- **Port**: 3000
|
||||
|
||||
### **3. Magicleaps (Main Application)**
|
||||
- **Production Domain**: `magicleaps.mathmast.com`
|
||||
- **Alpha Domain**: `alpha.magicleaps.mathmast.com`
|
||||
- **Service**: `magicleaps-frontend-service`
|
||||
- **Purpose**: Main business application
|
||||
- **Namespace**: `magicleaps`
|
||||
- **Port**: 80
|
||||
|
||||
---
|
||||
|
||||
## 🔒 **Security Features**
|
||||
|
||||
### **1. SSL/TLS Enforcement**
|
||||
```yaml
|
||||
# All traffic is forced to HTTPS
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
```
|
||||
|
||||
### **2. Automatic Certificate Management**
|
||||
- Let's Encrypt certificates
|
||||
- DNS01 challenge validation
|
||||
- Automatic renewal
|
||||
- GoDaddy DNS integration
|
||||
|
||||
### **3. Performance Optimizations**
|
||||
```yaml
|
||||
# Handle large requests
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "0"
|
||||
|
||||
# Long-running requests
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 **Monitoring and Debugging**
|
||||
|
||||
### **1. Check Ingress Status**
|
||||
```bash
|
||||
# Check all ingresses
|
||||
kubectl get ingress --all-namespaces
|
||||
|
||||
# Check specific ingress
|
||||
kubectl describe ingress <ingress-name> -n <namespace>
|
||||
|
||||
# Check ingress controller
|
||||
kubectl get pods -n freeleaps-controls-system -l app.kubernetes.io/name=ingress-nginx
|
||||
```
|
||||
|
||||
### **2. Check SSL Certificates**
|
||||
```bash
|
||||
# Check certificates
|
||||
kubectl get certificates --all-namespaces
|
||||
|
||||
# Check certificate status
|
||||
kubectl describe certificate <cert-name> -n <namespace>
|
||||
|
||||
# Check cert-manager
|
||||
kubectl get pods -n cert-manager
|
||||
```
|
||||
|
||||
### **3. Check DNS Resolution**
|
||||
```bash
|
||||
# Test DNS resolution
|
||||
nslookup argo.mathmast.com
|
||||
nslookup gitea.freeleaps.mathmast.com
|
||||
nslookup magicleaps.mathmast.com
|
||||
nslookup alpha.magicleaps.mathmast.com
|
||||
```
|
||||
|
||||
### **4. Check Azure Load Balancer**
|
||||
```bash
|
||||
# Your actual load balancer IP
|
||||
curl -I http://4.155.160.32
|
||||
|
||||
# Check if load balancer is responding
|
||||
telnet 4.155.160.32 80
|
||||
telnet 4.155.160.32 443
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **How Your Setup Compares to Examples**
|
||||
|
||||
### **Your Current Setup vs Example**
|
||||
|
||||
| Feature | Your Setup | Example Setup | Notes |
|
||||
|---------|------------|---------------|-------|
|
||||
| **Ingress Controller** | nginx-ingress v1.12.0 | nginx-ingress | Same |
|
||||
| **SSL Provider** | Let's Encrypt + GoDaddy | Let's Encrypt | You have DNS integration |
|
||||
| **Certificate Validation** | DNS01 challenge | HTTP01 challenge | More reliable |
|
||||
| **Automatic Creation** | Custom controller | Manual | You have automation |
|
||||
| **Performance** | Optimized timeouts | Basic | You have better config |
|
||||
| **Security** | SSL redirect enforced | SSL redirect | Same |
|
||||
|
||||
### **Advantages of Your Setup**
|
||||
|
||||
1. **Automation**: Custom controller automatically creates ingresses
|
||||
2. **DNS Integration**: GoDaddy API for automatic DNS record creation
|
||||
3. **Reliability**: DNS01 challenge is more reliable than HTTP01
|
||||
4. **Performance**: Optimized timeouts and body size limits
|
||||
5. **Security**: Enforced SSL redirects
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Troubleshooting Your Setup**
|
||||
|
||||
### **1. Certificate Issues**
|
||||
```bash
|
||||
# Check certificate status
|
||||
kubectl get certificates --all-namespaces
|
||||
|
||||
# Check cert-manager logs
|
||||
kubectl logs -n cert-manager deployment/cert-manager
|
||||
|
||||
# Check DNS records
|
||||
dig TXT _acme-challenge.mathmast.com
|
||||
```
|
||||
|
||||
### **2. Ingress Issues**
|
||||
```bash
|
||||
# Check ingress controller
|
||||
kubectl get pods -n freeleaps-controls-system -l app.kubernetes.io/name=ingress-nginx
|
||||
|
||||
# Check ingress controller logs
|
||||
kubectl logs -n freeleaps-controls-system deployment/ingress-nginx-controller
|
||||
|
||||
# Check ingress status
|
||||
kubectl describe ingress <ingress-name> -n <namespace>
|
||||
```
|
||||
|
||||
### **3. DNS Issues**
|
||||
```bash
|
||||
# Test DNS resolution
|
||||
nslookup <your-domain>
|
||||
|
||||
# Check GoDaddy API key
|
||||
kubectl get secret mathmast-godaddy-api-key -n cert-manager -o yaml
|
||||
```
|
||||
|
||||
### **4. Load Balancer Issues**
|
||||
```bash
|
||||
# Check if your load balancer is accessible
|
||||
curl -I http://4.155.160.32
|
||||
|
||||
# Check Azure load balancer health
|
||||
az network lb show --name prod-usw2-k8s-freeleaps-lb --resource-group <resource-group>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Learn More**
|
||||
|
||||
### **Your Specific Components**
|
||||
- [nginx-ingress](https://kubernetes.github.io/ingress-nginx/) - Your ingress controller
|
||||
- [cert-manager](https://cert-manager.io/docs/) - Your certificate manager
|
||||
- [GoDaddy DNS01](https://cert-manager.io/docs/configuration/acme/dns01/) - Your DNS provider (GoDaddy configuration)
|
||||
- [Let's Encrypt](https://letsencrypt.org/docs/) - Your certificate authority
|
||||
|
||||
### **Related Documentation**
|
||||
- [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/)
|
||||
- [SSL/TLS in Kubernetes](https://kubernetes.io/docs/concepts/services-networking/ingress/#tls)
|
||||
- [DNS01 Challenge](https://cert-manager.io/docs/configuration/acme/dns01/)
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
573
docs/Custom_Resources_And_Operators_Guide.md
Normal file
573
docs/Custom_Resources_And_Operators_Guide.md
Normal file
@ -0,0 +1,573 @@
|
||||
# Custom Resources & Operators Guide
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This guide explains **Custom Resources (CRs)**, **Custom Resource Definitions (CRDs)**, **Kubernetes Operators**, and how your `freeleaps-devops-reconciler` works as an operator to manage your DevOps infrastructure.
|
||||
|
||||
---
|
||||
|
||||
## 📊 **What Are Custom Resources?**
|
||||
|
||||
### **🔄 CR vs CRD Relationship**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CRD vs CR RELATIONSHIP │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ CUSTOM RESOURCE DEFINITION (CRD) │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ apiVersion: apiextensions.k8s.io/v1 │ │ │
|
||||
│ │ kind: CustomResourceDefinition │ │ │
|
||||
│ │ metadata: │ │ │
|
||||
│ │ name: devopsprojects.freeleaps.com │ │ │
|
||||
│ │ spec: │ │ │
|
||||
│ │ group: freeleaps.com │ │ │
|
||||
│ │ names: │ │ │
|
||||
│ │ kind: DevOpsProject │ │ │
|
||||
│ │ plural: devopsprojects │ │ │
|
||||
│ │ scope: Namespaced │ │ │
|
||||
│ │ versions: │ │ │
|
||||
│ │ - name: v1alpha1 │ │ │
|
||||
│ │ schema: │ │ │
|
||||
│ │ # Schema definition... │ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ CUSTOM RESOURCE (CR) │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ apiVersion: freeleaps.com/v1alpha1 │ │ │
|
||||
│ │ kind: DevOpsProject │ │ │
|
||||
│ │ metadata: │ │ │
|
||||
│ │ name: my-project │ │ │
|
||||
│ │ namespace: freeleaps-devops-system │ │ │
|
||||
│ │ spec: │ │ │
|
||||
│ │ projectName: "My Awesome Project" │ │ │
|
||||
│ │ projectId: "my-awesome-project" │ │ │
|
||||
│ │ git: │ │ │
|
||||
│ │ url: "https://github.com/myorg/myproject" │ │ │
|
||||
│ │ branch: "main" │ │ │
|
||||
│ │ registry: │ │ │
|
||||
│ │ url: "https://harbor.example.com" │ │ │
|
||||
│ │ project: "myproject" │ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### **🎯 Why Custom Resources?**
|
||||
|
||||
```yaml
|
||||
# Instead of managing multiple resources manually:
|
||||
# - Namespace
|
||||
# - ServiceAccount
|
||||
# - Role/RoleBinding
|
||||
# - ConfigMap
|
||||
# - Secret
|
||||
# - Deployment
|
||||
# - Service
|
||||
# - Ingress
|
||||
|
||||
# You can create ONE custom resource:
|
||||
apiVersion: freeleaps.com/v1alpha1
|
||||
kind: DevOpsProject
|
||||
metadata:
|
||||
name: my-project
|
||||
spec:
|
||||
projectName: "My Project"
|
||||
projectId: "my-project"
|
||||
git:
|
||||
url: "https://github.com/myorg/myproject"
|
||||
branch: "main"
|
||||
registry:
|
||||
url: "https://harbor.example.com"
|
||||
project: "myproject"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏭 **Your DevOps Reconciler Architecture**
|
||||
|
||||
### **📊 Reconciler vs DevOps Repo Relationship**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ RECONCILER + DEVOPS ARCHITECTURE │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ FRELEAPS.COM PLATFORM │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ User │ │ Project │ │ Git │ │ Registry │ │ │
|
||||
│ │ │ Creates │ │ Manager │ │ Webhook │ │ Manager │ │ │
|
||||
│ │ │ Project │ │ Creates │ │ Triggers │ │ Creates │ │ │
|
||||
│ │ │ │ │ DevOps │ │ Event │ │ Repo │ │ │
|
||||
│ │ │ │ │ Project │ │ │ │ │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ RABBITMQ MESSAGE QUEUE │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │DevOpsInit │ │DevOpsReconcile│ │DevOpsDeploy │ │DevOpsDelete │ │ │
|
||||
│ │ │Event │ │Event │ │Event │ │Event │ │ │
|
||||
│ │ │ │ │ │ │ │ │ │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ FRELEAPS-DEVOPS-RECONCILER (OPERATOR) │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ ┌─────────────────────────────────────────────────────────────┐ │ │ │
|
||||
│ │ │ CONTROLLERS │ │ │ │
|
||||
│ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────┐ │ │ │ │
|
||||
│ │ │ │DevOpsProject│ │ArgoSettings │ │Jenkins │ │... │ │ │ │ │
|
||||
│ │ │ │Controller │ │Controller │ │Settings │ │ │ │ │ │ │
|
||||
│ │ │ │ │ │ │ │Controller │ │ │ │ │ │ │
|
||||
│ │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────┘ │ │ │ │
|
||||
│ │ └─────────────────────────────────────────────────────────────┘ │ │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ ▼ │ │ │
|
||||
│ │ ┌─────────────────────────────────────────────────────────────┐ │ │ │
|
||||
│ │ │ CUSTOM RESOURCES │ │ │ │
|
||||
│ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────┐ │ │ │ │
|
||||
│ │ │ │DevOpsProject│ │ArgoSettings │ │Jenkins │ │... │ │ │ │ │
|
||||
│ │ │ │CR │ │CR │ │Settings │ │ │ │ │ │ │
|
||||
│ │ │ │ │ │ │ │CR │ │ │ │ │ │ │
|
||||
│ │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────┘ │ │ │ │
|
||||
│ │ └─────────────────────────────────────────────────────────────┘ │ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ ▼ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ KUBERNETES RESOURCES │ │ │
|
||||
│ │ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ArgoCD │ │Jenkins │ │Harbor │ │Namespaces │ │ │
|
||||
│ │ │Applications │ │Pipelines │ │Repositories │ │Services │ │ │
|
||||
│ │ │Projects │ │Jobs │ │Credentials │ │Deployments │ │ │
|
||||
│ │ │ │ │ │ │ │ │Ingresses │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Your Custom Resources**
|
||||
|
||||
### **1. DevOpsProject CRD**
|
||||
|
||||
```yaml
|
||||
# 🏭 ACTUAL CRD FROM YOUR CODEBASE
|
||||
# freeleaps-devops-reconciler/deploy/crds.yaml
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: devopsprojects.freeleaps.com
|
||||
spec:
|
||||
group: freeleaps.com
|
||||
scope: Namespaced
|
||||
names:
|
||||
kind: DevOpsProject
|
||||
plural: devopsprojects
|
||||
shortNames: [dop, dops]
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
required: ['spec']
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
required:
|
||||
- projectName
|
||||
- projectId
|
||||
- git
|
||||
- registry
|
||||
- environments
|
||||
properties:
|
||||
projectName:
|
||||
type: string
|
||||
description: "Human readable project name"
|
||||
projectId:
|
||||
type: string
|
||||
description: "Unique project identifier"
|
||||
pattern: "^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"
|
||||
git:
|
||||
type: object
|
||||
required: [url, branch]
|
||||
properties:
|
||||
url:
|
||||
type: string
|
||||
description: "Git repository URL"
|
||||
branch:
|
||||
type: string
|
||||
description: "Default git branch"
|
||||
default: "main"
|
||||
registry:
|
||||
type: object
|
||||
required: [url, project]
|
||||
properties:
|
||||
url:
|
||||
type: string
|
||||
description: "Container registry URL"
|
||||
project:
|
||||
type: string
|
||||
description: "Registry project name"
|
||||
```
|
||||
|
||||
### **2. DevOpsProject CR Example**
|
||||
|
||||
```yaml
|
||||
# 🏭 ACTUAL CR EXAMPLE
|
||||
apiVersion: freeleaps.com/v1alpha1
|
||||
kind: DevOpsProject
|
||||
metadata:
|
||||
name: magicleaps-frontend
|
||||
namespace: freeleaps-devops-system
|
||||
labels:
|
||||
app.kubernetes.io/name: magicleaps-frontend
|
||||
app.kubernetes.io/instance: magicleaps-frontend
|
||||
spec:
|
||||
projectName: "Magicleaps Frontend"
|
||||
projectId: "magicleaps-frontend"
|
||||
git:
|
||||
url: "https://github.com/freeleaps/magicleaps-frontend"
|
||||
branch: "main"
|
||||
credentialsRef:
|
||||
name: "github-credentials"
|
||||
namespace: "freeleaps-devops-system"
|
||||
registry:
|
||||
url: "https://harbor.freeleaps.mathmast.com"
|
||||
project: "magicleaps"
|
||||
credentialsRef:
|
||||
name: "harbor-credentials"
|
||||
namespace: "freeleaps-devops-system"
|
||||
environments:
|
||||
- name: "production"
|
||||
domain: "magicleaps.mathmast.com"
|
||||
replicas: 3
|
||||
- name: "alpha"
|
||||
domain: "alpha.magicleaps.mathmast.com"
|
||||
replicas: 1
|
||||
```
|
||||
|
||||
### **3. Other Custom Resources**
|
||||
|
||||
```yaml
|
||||
# 🏭 YOUR COMPLETE CRD SET
|
||||
# From freeleaps-devops-reconciler/docs/design/one-click-deployment.md
|
||||
|
||||
# DevOpsProject - Main project configuration
|
||||
# ArgoSettings - ArgoCD settings for the DevOpsProject
|
||||
# JenkinsSettings - Jenkins settings and generated pipelines
|
||||
# ContainerRegistry - Container registry information
|
||||
# ContainerImage - Every image manufactured by Jenkins pipeline
|
||||
# DeploymentRecord - Track deployment records
|
||||
# GitCredential - Git repository credentials
|
||||
# IngressResource - Ingress configuration
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🤖 **How Your Operator Works**
|
||||
|
||||
### **🔄 Reconciliation Loop**
|
||||
|
||||
```python
|
||||
# 🏭 ACTUAL CODE FROM YOUR RECONCILER
|
||||
# freeleaps-devops-reconciler/reconciler/controllers/devops_projects/controller.py
|
||||
|
||||
@kopf.on.create(group=consts.GROUP, version=consts.VERSION, kind=consts.DEVOPS_PROJECT_KIND)
|
||||
def on_devops_proj_created(name: str, namespace: Optional[str], body: Body, logger: Logger, **kwargs):
|
||||
logger.info(f"Newly created DevOpsProject resource and named {name} in namespace {namespace}, start to reconciling...")
|
||||
|
||||
devops_proj = DevOpsProject(body)
|
||||
|
||||
try:
|
||||
devops_proj.parse_spec()
|
||||
devops_proj.get_spec().validate(logger)
|
||||
except SpecError as e:
|
||||
devops_proj.update_status({
|
||||
'devopsProject': {
|
||||
'status': DevOpsProjectDiagStatus.INVALID.value,
|
||||
'synced': False,
|
||||
'ready': False,
|
||||
'lastProbeTime': isotime(),
|
||||
}
|
||||
})
|
||||
devops_proj.error(action='CreateDevOpsProject',
|
||||
reason='InvalidSpecArgument', msg=str(e))
|
||||
raise kopf.TemporaryError(f"Error found in DevOpsProject spec: {e}")
|
||||
|
||||
# Create resource manager and handle the project
|
||||
resource_manager = DevOpsProjectResourceManager(namespace, logger)
|
||||
# ... implementation details
|
||||
```
|
||||
|
||||
### **📊 Event Flow**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ EVENT FLOW │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ User │ │ RabbitMQ │ │ Operator │ │ Kubernetes │ │
|
||||
│ │ Action │ │ Message │ │ Controller│ │ Resources │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ │ 1. Create │ │ │ │
|
||||
│ │ Project │ │ │ │
|
||||
│ │───────────────▶│ │ │ │
|
||||
│ │ │ 2. DevOpsInit │ │ │
|
||||
│ │ │ Event │ │ │
|
||||
│ │ │───────────────▶│ │ │
|
||||
│ │ │ │ 3. Create CR │ │
|
||||
│ │ │ │───────────────▶│ │
|
||||
│ │ │ │ │ 4. CR Created │
|
||||
│ │ │ │ │◀───────────────│
|
||||
│ │ │ │ 5. Reconcile │ │
|
||||
│ │ │ │◀───────────────│ │
|
||||
│ │ │ │ 6. Create │ │
|
||||
│ │ │ │ Resources │ │
|
||||
│ │ │ │───────────────▶│ │
|
||||
│ │ │ │ │ 7. Resources │
|
||||
│ │ │ │ │ Ready │
|
||||
│ │ │ │ │◀───────────────│
|
||||
│ │ │ │ 8. Update │ │
|
||||
│ │ │ │ Status │ │
|
||||
│ │ │ │◀───────────────│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Understanding the Relationship**
|
||||
|
||||
### **📊 Reconciler vs DevOps Repo**
|
||||
|
||||
| Component | Purpose | Location | Responsibility |
|
||||
|-----------|---------|----------|----------------|
|
||||
| **freeleaps-devops-reconciler** | **Kubernetes Operator** | `freeleaps-ops/freeleaps-devops-reconciler/` | Watches CRs, creates K8s resources |
|
||||
| **freeleaps.com Platform** | **Business Logic** | `freeleaps-service-hub/` | User interface, project management |
|
||||
| **RabbitMQ** | **Message Queue** | Infrastructure | Event communication |
|
||||
| **ArgoCD** | **GitOps** | `freeleaps-ops/cluster/manifests/` | Application deployment |
|
||||
| **Jenkins** | **CI/CD** | Infrastructure | Pipeline execution |
|
||||
|
||||
### **🔄 How They Work Together**
|
||||
|
||||
```yaml
|
||||
# 1. User creates project on freeleaps.com
|
||||
# 2. Platform sends DevOpsInit event to RabbitMQ
|
||||
# 3. Reconciler receives event and creates DevOpsProject CR
|
||||
# 4. Reconciler watches CR and creates:
|
||||
# - ArgoCD Application
|
||||
# - Jenkins Pipeline
|
||||
# - Harbor Repository
|
||||
# - Namespace and RBAC
|
||||
# 5. ArgoCD deploys the application
|
||||
# 6. Jenkins runs the pipeline
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Practical Examples**
|
||||
|
||||
### **1. Creating a DevOpsProject**
|
||||
|
||||
```bash
|
||||
# Create a DevOpsProject CR
|
||||
kubectl apply -f - <<EOF
|
||||
apiVersion: freeleaps.com/v1alpha1
|
||||
kind: DevOpsProject
|
||||
metadata:
|
||||
name: my-new-project
|
||||
namespace: freeleaps-devops-system
|
||||
spec:
|
||||
projectName: "My New Project"
|
||||
projectId: "my-new-project"
|
||||
git:
|
||||
url: "https://github.com/myorg/myproject"
|
||||
branch: "main"
|
||||
registry:
|
||||
url: "https://harbor.example.com"
|
||||
project: "myproject"
|
||||
environments:
|
||||
- name: "production"
|
||||
domain: "myproject.example.com"
|
||||
replicas: 2
|
||||
EOF
|
||||
|
||||
# Check the CR status
|
||||
kubectl get devopsprojects -n freeleaps-devops-system
|
||||
kubectl describe devopsproject my-new-project -n freeleaps-devops-system
|
||||
```
|
||||
|
||||
### **2. Monitoring the Operator**
|
||||
|
||||
```bash
|
||||
# Check operator logs
|
||||
kubectl logs -n freeleaps-devops-system deployment/freeleaps-devops-reconciler
|
||||
|
||||
# Check CR status
|
||||
kubectl get devopsprojects --all-namespaces
|
||||
kubectl get argosettings --all-namespaces
|
||||
kubectl get jenkinssettings --all-namespaces
|
||||
|
||||
# Check created resources
|
||||
kubectl get applications -n freeleaps-devops-system
|
||||
kubectl get namespaces | grep my-new-project
|
||||
```
|
||||
|
||||
### **3. Troubleshooting**
|
||||
|
||||
```bash
|
||||
# Check CRD installation
|
||||
kubectl get crd | grep freeleaps.com
|
||||
|
||||
# Check operator events
|
||||
kubectl get events -n freeleaps-devops-system --sort-by='.lastTimestamp'
|
||||
|
||||
# Check resource creation
|
||||
kubectl get all -n my-new-project
|
||||
kubectl describe devopsproject my-new-project -n freeleaps-devops-system
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Best Practices**
|
||||
|
||||
### **1. CRD Design**
|
||||
|
||||
```yaml
|
||||
# ✅ DO: Use clear, descriptive names
|
||||
apiVersion: freeleaps.com/v1alpha1
|
||||
kind: DevOpsProject # Clear, descriptive
|
||||
|
||||
# ❌ DON'T: Use generic names
|
||||
kind: Project # Too generic
|
||||
```
|
||||
|
||||
### **2. Validation**
|
||||
|
||||
```yaml
|
||||
# ✅ DO: Include validation in CRD
|
||||
spec:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
required: ['spec']
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
required: ['projectName', 'projectId']
|
||||
properties:
|
||||
projectId:
|
||||
pattern: "^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"
|
||||
```
|
||||
|
||||
### **3. Status Management**
|
||||
|
||||
```yaml
|
||||
# ✅ DO: Include status in CR
|
||||
status:
|
||||
conditions:
|
||||
- type: Ready
|
||||
status: "True"
|
||||
reason: "ReconciliationSucceeded"
|
||||
message: "All resources created successfully"
|
||||
- type: Synced
|
||||
status: "True"
|
||||
reason: "ReconciliationSucceeded"
|
||||
message: "Spec has been reconciled"
|
||||
```
|
||||
|
||||
### **4. Error Handling**
|
||||
|
||||
```python
|
||||
# ✅ DO: Proper error handling
|
||||
try:
|
||||
# Create resources
|
||||
resource_manager.create_resources()
|
||||
except Exception as e:
|
||||
# Update status with error
|
||||
devops_proj.update_status({
|
||||
'status': 'Error',
|
||||
'message': str(e)
|
||||
})
|
||||
raise kopf.TemporaryError(f"Failed to create resources: {e}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **Debugging Your Operator**
|
||||
|
||||
### **1. Check Operator Status**
|
||||
|
||||
```bash
|
||||
# Check if operator is running
|
||||
kubectl get pods -n freeleaps-devops-system -l app=freeleaps-devops-reconciler
|
||||
|
||||
# Check operator logs
|
||||
kubectl logs -n freeleaps-devops-system deployment/freeleaps-devops-reconciler -f
|
||||
|
||||
# Check CRD installation
|
||||
kubectl get crd devopsprojects.freeleaps.com
|
||||
```
|
||||
|
||||
### **2. Check CR Status**
|
||||
|
||||
```bash
|
||||
# Check CR status
|
||||
kubectl get devopsprojects --all-namespaces -o wide
|
||||
|
||||
# Check CR events
|
||||
kubectl describe devopsproject <project-name> -n <namespace>
|
||||
|
||||
# Check CR YAML
|
||||
kubectl get devopsproject <project-name> -n <namespace> -o yaml
|
||||
```
|
||||
|
||||
### **3. Check Created Resources**
|
||||
|
||||
```bash
|
||||
# Check what resources were created
|
||||
kubectl get all -n <project-namespace>
|
||||
|
||||
# Check ArgoCD applications
|
||||
kubectl get applications -n freeleaps-devops-system
|
||||
|
||||
# Check Jenkins pipelines
|
||||
kubectl get jenkinssettings --all-namespaces
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Next Steps**
|
||||
|
||||
### **1. Learn More About Operators**
|
||||
- [Kubernetes Operators](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/)
|
||||
- [Kopf Framework](https://kopf.readthedocs.io/) (what your reconciler uses)
|
||||
- [Operator SDK](https://sdk.operatorframework.io/)
|
||||
|
||||
### **2. Understand Your Architecture**
|
||||
- Study your `freeleaps-devops-reconciler` code
|
||||
- Understand the event flow from RabbitMQ
|
||||
- Learn how CRs trigger resource creation
|
||||
|
||||
### **3. Extend Your Operator**
|
||||
- Add new custom resources
|
||||
- Implement new controllers
|
||||
- Add validation and error handling
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
558
docs/Ingress_Setup_And_Redirects_Guide.md
Normal file
558
docs/Ingress_Setup_And_Redirects_Guide.md
Normal file
@ -0,0 +1,558 @@
|
||||
# Ingress Setup & Redirects Guide
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This guide covers advanced ingress configuration, redirects, and routing patterns. Building on your existing `Current_Ingress_Analysis.md`, this focuses on practical setup and common patterns.
|
||||
|
||||
---
|
||||
|
||||
## 📊 **Ingress Setup Process**
|
||||
|
||||
### **Step 1: Install Ingress Controller**
|
||||
|
||||
```bash
|
||||
# Install nginx-ingress controller
|
||||
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.12.0/deploy/static/provider/cloud/deploy.yaml
|
||||
|
||||
# Verify installation
|
||||
kubectl get pods -n ingress-nginx
|
||||
kubectl get service ingress-nginx-controller -n ingress-nginx
|
||||
```
|
||||
|
||||
### **Step 2: Configure DNS**
|
||||
|
||||
```bash
|
||||
# Your actual Azure Load Balancer IP
|
||||
# IP: 4.155.160.32 (prod-usw2-k8s-freeleaps-lb-fe-ip)
|
||||
|
||||
# Add DNS records:
|
||||
# argo.mathmast.com → 4.155.160.32
|
||||
# gitea.freeleaps.mathmast.com → 4.155.160.32
|
||||
# magicleaps.mathmast.com → 4.155.160.32
|
||||
# alpha.magicleaps.mathmast.com → 4.155.160.32
|
||||
```
|
||||
|
||||
### **Step 3: Setup Cert-Manager**
|
||||
|
||||
```bash
|
||||
# Install cert-manager
|
||||
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml
|
||||
|
||||
# Verify installation
|
||||
kubectl get pods -n cert-manager
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔄 **Redirect Patterns**
|
||||
|
||||
### **1. HTTP to HTTPS Redirect**
|
||||
|
||||
```yaml
|
||||
# ✅ BEST PRACTICE: Force HTTPS
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: secure-app-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- myapp.example.com
|
||||
secretName: myapp-tls
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp-service
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
### **2. Path-Based Redirects**
|
||||
|
||||
```yaml
|
||||
# Redirect /old to /new
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: redirect-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/rewrite-target: /new
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
location /old {
|
||||
return 301 /new;
|
||||
}
|
||||
spec:
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /old
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp-service
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
### **3. Domain Redirects**
|
||||
|
||||
```yaml
|
||||
# Redirect old domain to new domain
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: domain-redirect
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
if ($host = "old.example.com") {
|
||||
return 301 https://new.example.com$request_uri;
|
||||
}
|
||||
spec:
|
||||
rules:
|
||||
- host: old.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp-service
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
### **4. Subdomain Redirects**
|
||||
|
||||
```yaml
|
||||
# Redirect www to non-www
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: www-redirect
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
if ($host = "www.example.com") {
|
||||
return 301 https://example.com$request_uri;
|
||||
}
|
||||
spec:
|
||||
rules:
|
||||
- host: www.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp-service
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Advanced Routing Patterns**
|
||||
|
||||
### **1. API Versioning**
|
||||
|
||||
```yaml
|
||||
# Route different API versions
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: api-versioning
|
||||
spec:
|
||||
rules:
|
||||
- host: api.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /v1
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-v1-service
|
||||
port:
|
||||
number: 8080
|
||||
- path: /v2
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-v2-service
|
||||
port:
|
||||
number: 8080
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-latest-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
### **2. Blue-Green Deployment**
|
||||
|
||||
```yaml
|
||||
# Route traffic between blue and green deployments
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: blue-green-routing
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
# Route 90% to blue, 10% to green
|
||||
set $upstream "";
|
||||
if ($request_uri ~ "^/blue") {
|
||||
set $upstream "blue-service:8080";
|
||||
}
|
||||
if ($request_uri ~ "^/green") {
|
||||
set $upstream "green-service:8080";
|
||||
}
|
||||
if ($upstream = "") {
|
||||
# Default routing logic
|
||||
set $random $remote_addr;
|
||||
if ($random ~ "^[0-9]$") {
|
||||
set $upstream "blue-service:8080";
|
||||
}
|
||||
if ($random ~ "^[a-f]$") {
|
||||
set $upstream "green-service:8080";
|
||||
}
|
||||
}
|
||||
spec:
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: blue-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
### **3. Geographic Routing**
|
||||
|
||||
```yaml
|
||||
# Route based on user location
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: geo-routing
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
# Route US users to US cluster, EU users to EU cluster
|
||||
set $upstream "";
|
||||
if ($http_x_forwarded_for ~ "^.*\.(us|ca)") {
|
||||
set $upstream "us-service:8080";
|
||||
}
|
||||
if ($http_x_forwarded_for ~ "^.*\.(eu|uk|de|fr)") {
|
||||
set $upstream "eu-service:8080";
|
||||
}
|
||||
if ($upstream = "") {
|
||||
set $upstream "default-service:8080";
|
||||
}
|
||||
spec:
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: default-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Performance Optimizations**
|
||||
|
||||
### **1. Rate Limiting**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: rate-limited-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/rate-limit: "100"
|
||||
nginx.ingress.kubernetes.io/rate-limit-window: "1m"
|
||||
nginx.ingress.kubernetes.io/rate-limit-burst: "200"
|
||||
spec:
|
||||
rules:
|
||||
- host: api.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
### **2. Caching Headers**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: cached-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
spec:
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp-service
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
### **3. Gzip Compression**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: compressed-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/enable-cors: "true"
|
||||
nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS"
|
||||
nginx.ingress.kubernetes.io/cors-allow-origin: "*"
|
||||
nginx.ingress.kubernetes.io/configuration-snippet: |
|
||||
gzip on;
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
|
||||
spec:
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: myapp-service
|
||||
port:
|
||||
number: 80
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🛡️ **Security Patterns**
|
||||
|
||||
### **1. IP Whitelisting**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: whitelisted-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/whitelist-source-range: "10.0.0.0/8,192.168.0.0/16"
|
||||
spec:
|
||||
rules:
|
||||
- host: internal.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: internal-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
### **2. Basic Auth**
|
||||
|
||||
```yaml
|
||||
# Create secret for basic auth
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: basic-auth-secret
|
||||
type: Opaque
|
||||
data:
|
||||
auth: <base64-encoded-htpasswd-file>
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: auth-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/auth-type: basic
|
||||
nginx.ingress.kubernetes.io/auth-secret: basic-auth-secret
|
||||
spec:
|
||||
rules:
|
||||
- host: protected.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: protected-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
### **3. CORS Configuration**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: cors-ingress
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/enable-cors: "true"
|
||||
nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS"
|
||||
nginx.ingress.kubernetes.io/cors-allow-origin: "https://myapp.example.com"
|
||||
nginx.ingress.kubernetes.io/cors-allow-credentials: "true"
|
||||
spec:
|
||||
rules:
|
||||
- host: api.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **Monitoring & Debugging**
|
||||
|
||||
### **1. Ingress Status Check**
|
||||
|
||||
```bash
|
||||
# Check ingress status
|
||||
kubectl get ingress --all-namespaces
|
||||
kubectl describe ingress <ingress-name> -n <namespace>
|
||||
|
||||
# Check ingress controller logs
|
||||
kubectl logs -n ingress-nginx deployment/ingress-nginx-controller
|
||||
|
||||
# Check certificate status
|
||||
kubectl get certificates --all-namespaces
|
||||
kubectl describe certificate <cert-name> -n <namespace>
|
||||
```
|
||||
|
||||
### **2. Test Redirects**
|
||||
|
||||
```bash
|
||||
# Test HTTP to HTTPS redirect
|
||||
curl -I http://myapp.example.com
|
||||
# Should return: 301 Moved Permanently
|
||||
|
||||
# Test domain redirect
|
||||
curl -I http://old.example.com
|
||||
# Should return: 301 Moved Permanently
|
||||
|
||||
# Test path redirect
|
||||
curl -I http://myapp.example.com/old
|
||||
# Should return: 301 Moved Permanently
|
||||
```
|
||||
|
||||
### **3. Performance Testing**
|
||||
|
||||
```bash
|
||||
# Test response times
|
||||
curl -w "@curl-format.txt" -o /dev/null -s "https://myapp.example.com"
|
||||
|
||||
# Load testing
|
||||
ab -n 1000 -c 10 https://myapp.example.com/
|
||||
|
||||
# SSL certificate check
|
||||
openssl s_client -connect myapp.example.com:443 -servername myapp.example.com
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Best Practices**
|
||||
|
||||
### **1. Always Use HTTPS**
|
||||
```yaml
|
||||
# ✅ DO: Force HTTPS redirect
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
|
||||
# ❌ DON'T: Allow HTTP traffic
|
||||
# nginx.ingress.kubernetes.io/ssl-redirect: "false"
|
||||
```
|
||||
|
||||
### **2. Use Specific Path Types**
|
||||
```yaml
|
||||
# ✅ DO: Use specific path types
|
||||
pathType: Prefix # For /api/v1/*
|
||||
pathType: Exact # For exact matches
|
||||
pathType: ImplementationSpecific # For complex patterns
|
||||
|
||||
# ❌ DON'T: Use default path types without understanding
|
||||
```
|
||||
|
||||
### **3. Implement Health Checks**
|
||||
```yaml
|
||||
# ✅ DO: Add health check endpoints
|
||||
nginx.ingress.kubernetes.io/health-check-path: "/health"
|
||||
nginx.ingress.kubernetes.io/health-check-interval: "30s"
|
||||
```
|
||||
|
||||
### **4. Monitor Resource Usage**
|
||||
```bash
|
||||
# Monitor ingress controller resources
|
||||
kubectl top pods -n ingress-nginx
|
||||
|
||||
# Monitor ingress metrics
|
||||
kubectl get ingress --all-namespaces -o wide
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Your Current Setup Analysis**
|
||||
|
||||
Based on your `Current_Ingress_Analysis.md`, you have:
|
||||
|
||||
### **✅ What's Working Well:**
|
||||
- **Nginx Ingress Controller**: Production-grade setup
|
||||
- **Cert-Manager**: Automatic SSL certificates
|
||||
- **DNS01 Challenge**: Reliable certificate validation
|
||||
- **Custom Controller**: Automated ingress creation
|
||||
- **Performance Optimizations**: Timeouts and body size limits
|
||||
|
||||
### **🔧 Potential Improvements:**
|
||||
1. **Rate Limiting**: Add rate limiting for API endpoints
|
||||
2. **Caching**: Implement caching for static assets
|
||||
3. **Monitoring**: Add ingress metrics and alerts
|
||||
4. **Backup Ingress**: Consider secondary ingress controller
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
433
docs/Kubernetes_Bootstrap_Guide.md
Normal file
433
docs/Kubernetes_Bootstrap_Guide.md
Normal file
@ -0,0 +1,433 @@
|
||||
# Kubernetes Bootstrap Guide
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This guide explains how to bootstrap a complete Kubernetes cluster from scratch using Azure VMs and the `freeleaps-ops` repository. **Kubernetes does NOT create automatically** - you need to manually bootstrap the entire infrastructure.
|
||||
|
||||
## 📋 **Prerequisites**
|
||||
|
||||
### **1. Azure Infrastructure**
|
||||
- ✅ Azure VMs (already provisioned)
|
||||
- ✅ Network connectivity between VMs
|
||||
- ✅ Azure AD tenant configured
|
||||
- ✅ Resource group: `k8s`
|
||||
|
||||
### **2. Local Environment**
|
||||
- ✅ `freeleaps-ops` repository cloned
|
||||
- ✅ Ansible installed (`pip install ansible`)
|
||||
- ✅ Azure CLI installed and configured
|
||||
- ✅ SSH access to VMs
|
||||
|
||||
### **3. VM Requirements**
|
||||
- **Master Nodes**: 2+ VMs for control plane
|
||||
- **Worker Nodes**: 2+ VMs for workloads
|
||||
- **Network**: All VMs in same subnet
|
||||
- **OS**: Ubuntu 20.04+ recommended
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **Step-by-Step Bootstrap Process**
|
||||
|
||||
### **Step 1: Verify Azure VMs**
|
||||
|
||||
```bash
|
||||
# Check VM status
|
||||
az vm list --resource-group k8s --query "[].{name:name,powerState:powerState,privateIP:privateIps}" -o table
|
||||
|
||||
# Ensure all VMs are running
|
||||
az vm start --resource-group k8s --name <vm-name>
|
||||
```
|
||||
|
||||
### **Step 2: Configure Inventory**
|
||||
|
||||
Edit the Ansible inventory file:
|
||||
|
||||
```bash
|
||||
cd freeleaps-ops
|
||||
vim cluster/ansible/manifests/inventory.ini
|
||||
```
|
||||
|
||||
**Example inventory structure:**
|
||||
```ini
|
||||
[all:vars]
|
||||
ansible_user=wwwadmin@mathmast.com
|
||||
ansible_ssh_common_args='-o StrictHostKeyChecking=no'
|
||||
|
||||
[kube_control_plane]
|
||||
prod-usw2-k8s-freeleaps-master-01 ansible_host=10.10.0.4 etcd_member_name=freeleaps-etcd-01 host_name=prod-usw2-k8s-freeleaps-master-01
|
||||
prod-usw2-k8s-freeleaps-master-02 ansible_host=10.10.0.5 etcd_member_name=freeleaps-etcd-02 host_name=prod-usw2-k8s-freeleaps-master-02
|
||||
|
||||
[kube_node]
|
||||
prod-usw2-k8s-freeleaps-worker-nodes-01 ansible_host=10.10.0.6 host_name=prod-usw2-k8s-freeleaps-worker-nodes-01
|
||||
prod-usw2-k8s-freeleaps-worker-nodes-02 ansible_host=10.10.0.7 host_name=prod-usw2-k8s-freeleaps-worker-nodes-02
|
||||
|
||||
[etcd]
|
||||
prod-usw2-k8s-freeleaps-master-01
|
||||
prod-usw2-k8s-freeleaps-master-02
|
||||
|
||||
[k8s_cluster:children]
|
||||
kube_control_plane
|
||||
kube_node
|
||||
```
|
||||
|
||||
### **Step 3: Test Connectivity**
|
||||
|
||||
```bash
|
||||
cd cluster/ansible/manifests
|
||||
ansible -i inventory.ini all -m ping -kK
|
||||
```
|
||||
|
||||
### **Step 4: Bootstrap Kubernetes Cluster**
|
||||
|
||||
```bash
|
||||
cd ../../3rd/kubespray
|
||||
ansible-playbook -i ../../cluster/ansible/manifests/inventory.ini ./cluster.yml -kK -b
|
||||
```
|
||||
|
||||
**What this does:**
|
||||
- Installs Docker/containerd on all nodes
|
||||
- Downloads Kubernetes binaries (v1.31.4)
|
||||
- Generates certificates and keys
|
||||
- Bootstraps etcd cluster
|
||||
- Starts Kubernetes control plane
|
||||
- Joins worker nodes
|
||||
- Configures Calico networking
|
||||
- Sets up OIDC authentication
|
||||
|
||||
### **Step 5: Get Kubeconfig**
|
||||
|
||||
```bash
|
||||
# Get kubeconfig from master node
|
||||
ssh wwwadmin@mathmast.com@10.10.0.4 "sudo cat /etc/kubernetes/admin.conf" > ~/.kube/config
|
||||
|
||||
# Test cluster access
|
||||
kubectl get nodes
|
||||
kubectl get pods -n kube-system
|
||||
```
|
||||
|
||||
### **Step 6: Deploy Infrastructure**
|
||||
|
||||
```bash
|
||||
cd ../../cluster/manifests
|
||||
|
||||
# Deploy in order
|
||||
kubectl apply -f freeleaps-controls-system/
|
||||
kubectl apply -f freeleaps-devops-system/
|
||||
kubectl apply -f freeleaps-monitoring-system/
|
||||
kubectl apply -f freeleaps-logging-system/
|
||||
kubectl apply -f freeleaps-data-platform/
|
||||
```
|
||||
|
||||
### **Step 7: Setup Authentication**
|
||||
|
||||
```bash
|
||||
cd ../../cluster/bin
|
||||
./freeleaps-cluster-authenticator auth
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🤖 **Automated Bootstrap Script**
|
||||
|
||||
Use the provided bootstrap script for automated deployment:
|
||||
|
||||
```bash
|
||||
cd freeleaps-ops/docs
|
||||
./bootstrap-k8s-cluster.sh
|
||||
```
|
||||
|
||||
**Script Features:**
|
||||
- ✅ Prerequisites verification
|
||||
- ✅ Azure VM status check
|
||||
- ✅ Connectivity testing
|
||||
- ✅ Automated cluster bootstrap
|
||||
- ✅ Infrastructure deployment
|
||||
- ✅ Authentication setup
|
||||
- ✅ Status verification
|
||||
|
||||
**Usage Options:**
|
||||
```bash
|
||||
# Full bootstrap
|
||||
./bootstrap-k8s-cluster.sh
|
||||
|
||||
# Only verify prerequisites
|
||||
./bootstrap-k8s-cluster.sh --verify
|
||||
|
||||
# Only bootstrap cluster (skip infrastructure)
|
||||
./bootstrap-k8s-cluster.sh --bootstrap
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Manual Bootstrap Commands**
|
||||
|
||||
If you prefer manual control, here are the detailed commands:
|
||||
|
||||
### **1. Install Prerequisites**
|
||||
|
||||
```bash
|
||||
# Install Ansible
|
||||
pip install ansible
|
||||
|
||||
# Install Azure CLI
|
||||
curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
|
||||
|
||||
# Install kubectl
|
||||
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
|
||||
```
|
||||
|
||||
### **2. Configure Azure**
|
||||
|
||||
```bash
|
||||
# Login to Azure
|
||||
az login
|
||||
|
||||
# Set subscription
|
||||
az account set --subscription <subscription-id>
|
||||
```
|
||||
|
||||
### **3. Bootstrap Cluster**
|
||||
|
||||
```bash
|
||||
# Navigate to kubespray
|
||||
cd freeleaps-ops/3rd/kubespray
|
||||
|
||||
# Run cluster installation
|
||||
ansible-playbook -i ../../cluster/ansible/manifests/inventory.ini ./cluster.yml -kK -b
|
||||
```
|
||||
|
||||
### **4. Verify Installation**
|
||||
|
||||
```bash
|
||||
# Get kubeconfig
|
||||
ssh wwwadmin@mathmast.com@<master-ip> "sudo cat /etc/kubernetes/admin.conf" > ~/.kube/config
|
||||
|
||||
# Test cluster
|
||||
kubectl get nodes
|
||||
kubectl get pods -n kube-system
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **Verification Steps**
|
||||
|
||||
### **1. Cluster Health**
|
||||
|
||||
```bash
|
||||
# Check nodes
|
||||
kubectl get nodes -o wide
|
||||
|
||||
# Check system pods
|
||||
kubectl get pods -n kube-system
|
||||
|
||||
# Check cluster info
|
||||
kubectl cluster-info
|
||||
```
|
||||
|
||||
### **2. Network Verification**
|
||||
|
||||
```bash
|
||||
# Check Calico pods
|
||||
kubectl get pods -n kube-system | grep calico
|
||||
|
||||
# Check network policies
|
||||
kubectl get networkpolicies --all-namespaces
|
||||
```
|
||||
|
||||
### **3. Authentication Test**
|
||||
|
||||
```bash
|
||||
# Test OIDC authentication
|
||||
kubectl auth whoami
|
||||
|
||||
# Check permissions
|
||||
kubectl auth can-i --list
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚨 **Troubleshooting**
|
||||
|
||||
### **Common Issues**
|
||||
|
||||
#### **1. Ansible Connection Failed**
|
||||
```bash
|
||||
# Check VM status
|
||||
az vm show --resource-group k8s --name <vm-name> --query "powerState"
|
||||
|
||||
# Test SSH manually
|
||||
ssh wwwadmin@mathmast.com@<vm-ip>
|
||||
|
||||
# Check network security groups
|
||||
az network nsg rule list --resource-group k8s --nsg-name <nsg-name>
|
||||
```
|
||||
|
||||
#### **2. Cluster Bootstrap Failed**
|
||||
```bash
|
||||
# Check Ansible logs
|
||||
ansible-playbook -i inventory.ini cluster.yml -kK -b -vvv
|
||||
|
||||
# Check VM resources
|
||||
kubectl describe node <node-name>
|
||||
|
||||
# Check system pods
|
||||
kubectl get pods -n kube-system
|
||||
kubectl describe pod <pod-name> -n kube-system
|
||||
```
|
||||
|
||||
#### **3. Infrastructure Deployment Failed**
|
||||
```bash
|
||||
# Check CRDs
|
||||
kubectl get crd
|
||||
|
||||
# Check operator pods
|
||||
kubectl get pods --all-namespaces | grep operator
|
||||
|
||||
# Check events
|
||||
kubectl get events --all-namespaces --sort-by='.lastTimestamp'
|
||||
```
|
||||
|
||||
### **Recovery Procedures**
|
||||
|
||||
#### **If Bootstrap Fails**
|
||||
1. **Clean up failed installation**
|
||||
```bash
|
||||
# Reset VMs to clean state
|
||||
az vm restart --resource-group k8s --name <vm-name>
|
||||
```
|
||||
|
||||
2. **Retry bootstrap**
|
||||
```bash
|
||||
cd freeleaps-ops/3rd/kubespray
|
||||
ansible-playbook -i ../../cluster/ansible/manifests/inventory.ini ./cluster.yml -kK -b
|
||||
```
|
||||
|
||||
#### **If Infrastructure Deployment Fails**
|
||||
1. **Check prerequisites**
|
||||
```bash
|
||||
kubectl get nodes
|
||||
kubectl get pods -n kube-system
|
||||
```
|
||||
|
||||
2. **Redeploy components**
|
||||
```bash
|
||||
kubectl delete -f <component-directory>/
|
||||
kubectl apply -f <component-directory>/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 **Post-Bootstrap Verification**
|
||||
|
||||
### **1. Core Components**
|
||||
|
||||
```bash
|
||||
# ArgoCD
|
||||
kubectl get pods -n freeleaps-devops-system | grep argocd
|
||||
|
||||
# Cert-manager
|
||||
kubectl get pods -n freeleaps-controls-system | grep cert-manager
|
||||
|
||||
# Prometheus/Grafana
|
||||
kubectl get pods -n freeleaps-monitoring-system | grep prometheus
|
||||
kubectl get pods -n freeleaps-monitoring-system | grep grafana
|
||||
|
||||
# Logging
|
||||
kubectl get pods -n freeleaps-logging-system | grep loki
|
||||
```
|
||||
|
||||
### **2. Access Points**
|
||||
|
||||
```bash
|
||||
# ArgoCD UI
|
||||
kubectl port-forward svc/argocd-server -n freeleaps-devops-system 8080:80
|
||||
|
||||
# Grafana UI
|
||||
kubectl port-forward svc/kube-prometheus-stack-grafana -n freeleaps-monitoring-system 3000:80
|
||||
|
||||
# Kubernetes Dashboard
|
||||
kubectl port-forward svc/kubernetes-dashboard-kong-proxy -n freeleaps-infra-system 8443:443
|
||||
```
|
||||
|
||||
### **3. Authentication Setup**
|
||||
|
||||
```bash
|
||||
# Setup user authentication
|
||||
cd freeleaps-ops/cluster/bin
|
||||
./freeleaps-cluster-authenticator auth
|
||||
|
||||
# Test authentication
|
||||
kubectl auth whoami
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔒 **Security Considerations**
|
||||
|
||||
### **1. Network Security**
|
||||
- Ensure VMs are in private subnets
|
||||
- Configure network security groups properly
|
||||
- Use VPN or bastion host for access
|
||||
|
||||
### **2. Access Control**
|
||||
- Use Azure AD OIDC for authentication
|
||||
- Implement RBAC for authorization
|
||||
- Regular access reviews
|
||||
|
||||
### **3. Monitoring**
|
||||
- Enable audit logging
|
||||
- Monitor cluster health
|
||||
- Set up alerts
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Next Steps**
|
||||
|
||||
### **1. Application Deployment**
|
||||
- Deploy applications via ArgoCD
|
||||
- Configure CI/CD pipelines
|
||||
- Set up monitoring and alerting
|
||||
|
||||
### **2. Maintenance**
|
||||
- Regular security updates
|
||||
- Backup etcd data
|
||||
- Monitor resource usage
|
||||
|
||||
### **3. Scaling**
|
||||
- Add more worker nodes
|
||||
- Configure auto-scaling
|
||||
- Optimize resource allocation
|
||||
|
||||
---
|
||||
|
||||
## 🆘 **Support**
|
||||
|
||||
### **Emergency Contacts**
|
||||
- **Infrastructure Team**: [Contact Information]
|
||||
- **Azure Support**: [Contact Information]
|
||||
- **Kubernetes Community**: [Contact Information]
|
||||
|
||||
### **Useful Commands**
|
||||
```bash
|
||||
# Cluster status
|
||||
kubectl get nodes
|
||||
kubectl get pods --all-namespaces
|
||||
|
||||
# Logs
|
||||
kubectl logs -n kube-system <pod-name>
|
||||
|
||||
# Events
|
||||
kubectl get events --all-namespaces --sort-by='.lastTimestamp'
|
||||
|
||||
# Resource usage
|
||||
kubectl top nodes
|
||||
kubectl top pods --all-namespaces
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
840
docs/Kubernetes_Core_Concepts_Guide.md
Normal file
840
docs/Kubernetes_Core_Concepts_Guide.md
Normal file
@ -0,0 +1,840 @@
|
||||
# Kubernetes Core Concepts Guide
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This guide explains the fundamental Kubernetes concepts: **Pods**, **Namespaces**, and **Persistent Volume Claims (PVCs)**. These are the building blocks of your applications in Kubernetes.
|
||||
|
||||
---
|
||||
|
||||
## 🏭 **Your Codebase Usage Patterns**
|
||||
|
||||
Before diving into the concepts, here's what your codebase actually uses:
|
||||
|
||||
### **📋 Deployment Methods Used in Your Codebase**
|
||||
| Method | Used In Your Codebase | Example Location |
|
||||
|--------|----------------------|------------------|
|
||||
| **Helm Charts** | ✅ **Primary method** | `freeleaps-ops/freeleaps/helm-pkg/` |
|
||||
| **kubectl apply** | ✅ **Secondary method** | `freeleaps-devops-reconciler/scripts/deploy.sh` |
|
||||
| **kubectl run** | ❌ **Not used** | - |
|
||||
| **Direct YAML** | ✅ **For simple resources** | `freeleaps-ops/cluster/manifests/` |
|
||||
|
||||
### **🔧 Your Actual Commands**
|
||||
```bash
|
||||
# Your codebase uses these patterns:
|
||||
helm install/upgrade <release> <chart> --namespace <namespace> -f <values.yaml>
|
||||
kubectl apply -f <directory>/
|
||||
kubectl get pods -n <namespace> -l app.kubernetes.io/name=<app-name>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📦 **1. Pods (The Smallest Unit)**
|
||||
|
||||
### **What is a Pod?**
|
||||
|
||||
A **Pod** is the smallest deployable unit in Kubernetes. Think of it as a "wrapper" that contains one or more containers.
|
||||
|
||||
### **Pod Characteristics**
|
||||
|
||||
- **Atomic Unit**: Pods are created, scheduled, and destroyed together
|
||||
- **Shared Network**: Containers in a Pod share the same IP address
|
||||
- **Shared Storage**: Containers can share volumes
|
||||
- **Lifecycle**: Pods are ephemeral (temporary)
|
||||
|
||||
### **Pod Structure**
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: my-app-pod
|
||||
namespace: default
|
||||
labels:
|
||||
app: my-app
|
||||
version: v1
|
||||
spec:
|
||||
containers:
|
||||
- name: app-container
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
```
|
||||
|
||||
### **Creating Pods**
|
||||
|
||||
#### **Method 1: Direct Pod Creation** ❌ **BAD PRACTICE - NOT USED IN YOUR CODEBASE**
|
||||
```bash
|
||||
# ❌ BAD PRACTICE: This method is NOT used in your codebase (and shouldn't be!)
|
||||
# Create a simple nginx pod
|
||||
kubectl run nginx-pod --image=nginx:latest --port=80
|
||||
|
||||
# Why this is BAD:
|
||||
# - Creates standalone Pods (no self-healing)
|
||||
# - No scaling capability
|
||||
# - No rolling updates
|
||||
# - No rollback capability
|
||||
# - No resource limits
|
||||
# - Not declarative
|
||||
|
||||
# ✅ GOOD PRACTICE: This method IS used in your codebase
|
||||
# Create from YAML file
|
||||
kubectl apply -f pod.yaml
|
||||
```
|
||||
|
||||
#### **Method 2: Using YAML File** ✅ **GOOD PRACTICE - USED IN YOUR CODEBASE**
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
# pod.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: web-app
|
||||
labels:
|
||||
app: web
|
||||
spec:
|
||||
containers:
|
||||
- name: web
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
env:
|
||||
- name: ENVIRONMENT
|
||||
value: "production"
|
||||
```
|
||||
|
||||
#### **Method 3: Helm Charts** ✅ **BEST PRACTICE - PRIMARY METHOD IN YOUR CODEBASE**
|
||||
```yaml
|
||||
# 🏭 ACTUAL EXAMPLE FROM YOUR CODEBASE
|
||||
# freeleaps-ops/freeleaps/helm-pkg/freeleaps/templates/freeleaps/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
app.kubernetes.io/name: "freeleaps"
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
name: "freeleaps"
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "freeleaps"
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
replicas: {{ .Values.freeleaps.replicas }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: "freeleaps"
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: "freeleaps"
|
||||
image: "{{ .Values.freeleaps.image.registry }}/{{ .Values.freeleaps.image.repository }}:{{ .Values.freeleaps.image.tag }}"
|
||||
```
|
||||
|
||||
### **🎯 Best Practices for Pod Creation**
|
||||
|
||||
#### **❌ What NOT to Do**
|
||||
```bash
|
||||
# ❌ NEVER use kubectl run for production applications
|
||||
kubectl run my-app --image=my-app:latest --port=8080
|
||||
|
||||
# ❌ NEVER create standalone Pods for services
|
||||
kubectl run database --image=postgres:13 --port=5432
|
||||
|
||||
# ❌ NEVER use imperative commands for production
|
||||
kubectl run nginx --image=nginx:latest
|
||||
```
|
||||
|
||||
#### **✅ What TO Do**
|
||||
```bash
|
||||
# ✅ Use Deployments for applications
|
||||
kubectl create deployment my-app --image=my-app:latest
|
||||
|
||||
# ✅ Use Helm charts for complex applications
|
||||
helm install my-app ./my-app-chart --namespace my-app
|
||||
|
||||
# ✅ Use kubectl apply for declarative deployments
|
||||
kubectl apply -f deployment.yaml
|
||||
|
||||
# ✅ Use StatefulSets for databases
|
||||
kubectl apply -f statefulset.yaml
|
||||
```
|
||||
|
||||
#### **🔧 When `kubectl run` is Acceptable**
|
||||
```bash
|
||||
# ✅ OK: One-time debugging pods
|
||||
kubectl run debug-pod --image=busybox --rm -it --restart=Never -- nslookup my-service
|
||||
|
||||
# ✅ OK: Temporary testing
|
||||
kubectl run test-pod --image=nginx --rm -it --restart=Never -- curl http://my-service:80
|
||||
|
||||
# ✅ OK: Quick experiments (development only)
|
||||
kubectl run temp-pod --image=nginx --port=80
|
||||
```
|
||||
|
||||
### **Managing Pods**
|
||||
|
||||
```bash
|
||||
# List pods
|
||||
kubectl get pods
|
||||
kubectl get pods -n <namespace>
|
||||
|
||||
# Get detailed info
|
||||
kubectl describe pod <pod-name>
|
||||
|
||||
# View logs
|
||||
kubectl logs <pod-name>
|
||||
kubectl logs <pod-name> -f # Follow logs
|
||||
|
||||
# Execute commands in pod
|
||||
kubectl exec -it <pod-name> -- /bin/bash
|
||||
|
||||
# Delete pod
|
||||
kubectl delete pod <pod-name>
|
||||
```
|
||||
|
||||
### **Pod Lifecycle**
|
||||
|
||||
```bash
|
||||
# Check pod status
|
||||
kubectl get pods -o wide
|
||||
|
||||
# Common statuses:
|
||||
# - Pending: Pod is being scheduled
|
||||
# - Running: Pod is running
|
||||
# - Succeeded: Pod completed successfully
|
||||
# - Failed: Pod failed
|
||||
# - Unknown: Pod status unclear
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏢 **2. Namespaces (Logical Isolation)**
|
||||
|
||||
### **What is a Namespace?**
|
||||
|
||||
A **Namespace** is a way to divide cluster resources among multiple users, teams, or applications. It provides a scope for names.
|
||||
|
||||
### **Namespace Benefits**
|
||||
|
||||
- **Resource Isolation**: Separate resources logically
|
||||
- **Access Control**: Different permissions per namespace
|
||||
- **Resource Quotas**: Limit resource usage
|
||||
- **Network Policies**: Control network traffic
|
||||
|
||||
### **Default Namespaces**
|
||||
|
||||
```bash
|
||||
# View all namespaces
|
||||
kubectl get namespaces
|
||||
|
||||
# Default namespaces:
|
||||
# - default: User resources
|
||||
# - kube-system: System components
|
||||
# - kube-public: Public resources
|
||||
# - kube-node-lease: Node lease objects
|
||||
```
|
||||
|
||||
### **Creating Namespaces**
|
||||
|
||||
#### **Method 1: Command Line** ✅ **USED IN YOUR CODEBASE**
|
||||
```bash
|
||||
# ✅ This method IS used in your codebase
|
||||
# Create namespace
|
||||
kubectl create namespace my-app
|
||||
|
||||
# ✅ This pattern IS used in your codebase
|
||||
# Create with labels
|
||||
kubectl create namespace my-app --dry-run=client -o yaml | \
|
||||
kubectl label --local -f - environment=production | \
|
||||
kubectl apply -f -
|
||||
```
|
||||
|
||||
#### **Method 2: YAML File** ✅ **USED IN YOUR CODEBASE**
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
# namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: my-app
|
||||
labels:
|
||||
environment: production
|
||||
team: backend
|
||||
```
|
||||
|
||||
#### **Method 3: Helm Charts** ✅ **PRIMARY METHOD IN YOUR CODEBASE**
|
||||
```yaml
|
||||
# 🏭 ACTUAL EXAMPLE FROM YOUR CODEBASE
|
||||
# Your Helm charts automatically create namespaces
|
||||
# freeleaps-devops-reconciler/scripts/deploy.sh
|
||||
HELM_CMD+=(--namespace "$NAMESPACE")
|
||||
|
||||
# Create namespace if requested
|
||||
if [[ "$CREATE_NAMESPACE" == "true" && "$UPGRADE" != "true" ]]; then
|
||||
HELM_CMD+=(--create-namespace)
|
||||
fi
|
||||
```
|
||||
|
||||
### **Working with Namespaces**
|
||||
|
||||
```bash
|
||||
# Set default namespace
|
||||
kubectl config set-context --current --namespace=my-app
|
||||
|
||||
# Run command in specific namespace
|
||||
kubectl get pods -n my-app
|
||||
|
||||
# Create resource in namespace
|
||||
kubectl run nginx --image=nginx -n my-app
|
||||
|
||||
# Delete namespace (deletes all resources)
|
||||
kubectl delete namespace my-app
|
||||
```
|
||||
|
||||
### **Namespace Best Practices**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
# Example: Production namespace setup
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: production
|
||||
labels:
|
||||
environment: production
|
||||
team: platform
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ResourceQuota
|
||||
metadata:
|
||||
name: production-quota
|
||||
namespace: production
|
||||
spec:
|
||||
hard:
|
||||
requests.cpu: "4"
|
||||
requests.memory: 8Gi
|
||||
limits.cpu: "8"
|
||||
limits.memory: 16Gi
|
||||
pods: "20"
|
||||
```
|
||||
|
||||
### **Your Actual Namespace Structure**
|
||||
```bash
|
||||
# 🏭 YOUR ACTUAL NAMESPACES
|
||||
kubectl get namespaces
|
||||
|
||||
# Your codebase uses these namespaces:
|
||||
# - freeleaps-controls-system (ingress, cert-manager)
|
||||
# - freeleaps-devops-system (ArgoCD)
|
||||
# - freeleaps-prod (Gitea)
|
||||
# - magicleaps (main application)
|
||||
# - freeleaps-alpha (testing)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 💾 **3. Persistent Volume Claims (PVCs)**
|
||||
|
||||
### **What is a PVC?**
|
||||
|
||||
A **Persistent Volume Claim (PVC)** is a request for storage by a user. It's like a "storage reservation" that provides persistent storage to Pods.
|
||||
|
||||
### **Storage Concepts**
|
||||
|
||||
- **Persistent Volume (PV)**: The actual storage resource
|
||||
- **Persistent Volume Claim (PVC)**: A request for storage
|
||||
- **Storage Class**: Defines the type of storage
|
||||
|
||||
### **PVC Structure**
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: my-app-storage
|
||||
namespace: my-app
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce # Single node read/write
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
storageClassName: managed-premium # Azure Premium SSD
|
||||
```
|
||||
|
||||
### **Creating PVCs**
|
||||
|
||||
#### **Method 1: Command Line** ✅ **USED IN YOUR CODEBASE**
|
||||
```bash
|
||||
# ✅ This method IS used in your codebase
|
||||
# Create PVC
|
||||
kubectl create -f pvc.yaml
|
||||
|
||||
# ✅ This pattern IS used in your codebase
|
||||
# Create with kubectl
|
||||
kubectl apply -f - <<EOF
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: app-data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
EOF
|
||||
```
|
||||
|
||||
#### **Method 2: YAML File** ✅ **USED IN YOUR CODEBASE**
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
# pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: database-storage
|
||||
namespace: my-app
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
storageClassName: managed-premium
|
||||
```
|
||||
|
||||
#### **Method 3: Helm Charts** ✅ **PRIMARY METHOD IN YOUR CODEBASE**
|
||||
```yaml
|
||||
# 🏭 ACTUAL EXAMPLE FROM YOUR CODEBASE
|
||||
# freeleaps-ops/freeleaps/helm-pkg/3rd/gitea/values.prod.yaml
|
||||
persistence:
|
||||
enabled: true
|
||||
create: true
|
||||
mount: true
|
||||
claimName: freeleaps-prod-gitea-shared-storage
|
||||
size: 8Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
annotations:
|
||||
helm.sh/resource-policy: keep
|
||||
```
|
||||
|
||||
### **Using PVCs in Pods**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: app-with-storage
|
||||
namespace: my-app
|
||||
spec:
|
||||
containers:
|
||||
- name: app
|
||||
image: my-app:latest
|
||||
volumeMounts:
|
||||
- name: app-storage
|
||||
mountPath: /app/data
|
||||
volumes:
|
||||
- name: app-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: database-storage
|
||||
```
|
||||
|
||||
### **Managing PVCs**
|
||||
|
||||
```bash
|
||||
# List PVCs
|
||||
kubectl get pvc
|
||||
kubectl get pvc -n my-app
|
||||
|
||||
# Get detailed info
|
||||
kubectl describe pvc <pvc-name>
|
||||
|
||||
# Delete PVC
|
||||
kubectl delete pvc <pvc-name>
|
||||
|
||||
# Check storage classes
|
||||
kubectl get storageclass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **4. Practical Examples**
|
||||
|
||||
### **Example 1: Web Application with Database**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
# namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: webapp
|
||||
---
|
||||
# database-pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: database-storage
|
||||
namespace: webapp
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
storageClassName: managed-premium
|
||||
---
|
||||
# database-pod.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: database
|
||||
namespace: webapp
|
||||
labels:
|
||||
app: database
|
||||
spec:
|
||||
containers:
|
||||
- name: postgres
|
||||
image: postgres:13
|
||||
env:
|
||||
- name: POSTGRES_DB
|
||||
value: "myapp"
|
||||
- name: POSTGRES_PASSWORD
|
||||
value: "secret"
|
||||
ports:
|
||||
- containerPort: 5432
|
||||
volumeMounts:
|
||||
- name: db-storage
|
||||
mountPath: /var/lib/postgresql/data
|
||||
volumes:
|
||||
- name: db-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: database-storage
|
||||
---
|
||||
# webapp-pod.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: webapp
|
||||
namespace: webapp
|
||||
labels:
|
||||
app: webapp
|
||||
spec:
|
||||
containers:
|
||||
- name: webapp
|
||||
image: my-webapp:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
env:
|
||||
- name: DATABASE_URL
|
||||
value: "postgresql://postgres:secret@database:5432/myapp"
|
||||
```
|
||||
|
||||
### **Example 2: Multi-Container Pod**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: app-with-sidecar
|
||||
namespace: my-app
|
||||
spec:
|
||||
containers:
|
||||
- name: main-app
|
||||
image: my-app:latest
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
volumeMounts:
|
||||
- name: shared-data
|
||||
mountPath: /app/data
|
||||
- name: log-collector
|
||||
image: fluentd:latest
|
||||
volumeMounts:
|
||||
- name: shared-data
|
||||
mountPath: /logs
|
||||
- name: config-volume
|
||||
mountPath: /etc/fluentd
|
||||
volumes:
|
||||
- name: shared-data
|
||||
emptyDir: {}
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: fluentd-config
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🛠️ **5. Management Commands**
|
||||
|
||||
### **Pod Management**
|
||||
|
||||
```bash
|
||||
# Create and manage pods
|
||||
kubectl run nginx --image=nginx:latest --port=80
|
||||
kubectl get pods
|
||||
kubectl describe pod nginx
|
||||
kubectl logs nginx
|
||||
kubectl exec -it nginx -- /bin/bash
|
||||
kubectl delete pod nginx
|
||||
|
||||
# Port forwarding
|
||||
kubectl port-forward nginx 8080:80
|
||||
|
||||
# Copy files
|
||||
kubectl cp local-file.txt nginx:/tmp/
|
||||
```
|
||||
|
||||
### **Namespace Management**
|
||||
|
||||
```bash
|
||||
# Create and manage namespaces
|
||||
kubectl create namespace my-app
|
||||
kubectl get namespaces
|
||||
kubectl get pods -n my-app
|
||||
kubectl config set-context --current --namespace=my-app
|
||||
kubectl delete namespace my-app
|
||||
```
|
||||
|
||||
### **PVC Management**
|
||||
|
||||
```bash
|
||||
# Create and manage PVCs
|
||||
kubectl apply -f pvc.yaml
|
||||
kubectl get pvc
|
||||
kubectl describe pvc my-pvc
|
||||
kubectl delete pvc my-pvc
|
||||
|
||||
# Check storage usage
|
||||
kubectl get pv
|
||||
kubectl get storageclass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 **6. Monitoring and Debugging**
|
||||
|
||||
### **Pod Health Checks**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: healthy-app
|
||||
spec:
|
||||
containers:
|
||||
- name: app
|
||||
image: my-app:latest
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
```
|
||||
|
||||
### **Resource Monitoring**
|
||||
|
||||
```bash
|
||||
# Check resource usage
|
||||
kubectl top pods
|
||||
kubectl top nodes
|
||||
|
||||
# Check events
|
||||
kubectl get events --sort-by='.lastTimestamp'
|
||||
|
||||
# Check pod status
|
||||
kubectl get pods -o wide
|
||||
kubectl describe pod <pod-name>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔒 **7. Security Best Practices**
|
||||
|
||||
### **Pod Security**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: secure-app
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 2000
|
||||
containers:
|
||||
- name: app
|
||||
image: my-app:latest
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
```
|
||||
|
||||
### **Network Policies**
|
||||
|
||||
```yaml
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: default-deny
|
||||
namespace: my-app
|
||||
spec:
|
||||
podSelector: {}
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **8. Next Steps**
|
||||
|
||||
### **Advanced Concepts**
|
||||
|
||||
- **Deployments**: Manage Pod replicas
|
||||
- **Services**: Expose Pods internally/externally
|
||||
- **ConfigMaps & Secrets**: Configuration management
|
||||
- **Jobs & CronJobs**: Batch processing
|
||||
- **StatefulSets**: Stateful applications
|
||||
|
||||
### **Best Practices**
|
||||
|
||||
1. **Don't create Pods directly** - Use Deployments
|
||||
2. **Use namespaces** for organization
|
||||
3. **Set resource limits** on all containers
|
||||
4. **Use health checks** for reliability
|
||||
5. **Implement security contexts**
|
||||
6. **Monitor resource usage**
|
||||
|
||||
---
|
||||
|
||||
## 🆘 **Troubleshooting**
|
||||
|
||||
### **Common Issues**
|
||||
|
||||
```bash
|
||||
# Pod stuck in Pending
|
||||
kubectl describe pod <pod-name>
|
||||
kubectl get events --sort-by='.lastTimestamp'
|
||||
|
||||
# PVC not bound
|
||||
kubectl describe pvc <pvc-name>
|
||||
kubectl get pv
|
||||
|
||||
# Namespace issues
|
||||
kubectl get namespaces
|
||||
kubectl describe namespace <namespace-name>
|
||||
```
|
||||
|
||||
### **Useful Commands**
|
||||
|
||||
```bash
|
||||
# Debug pod
|
||||
kubectl logs <pod-name>
|
||||
kubectl exec -it <pod-name> -- /bin/bash
|
||||
kubectl describe pod <pod-name>
|
||||
|
||||
# Check resources
|
||||
kubectl get all -n <namespace>
|
||||
kubectl get pvc,pv -n <namespace>
|
||||
kubectl get events -n <namespace>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏭 **Appendix: Your Codebase Patterns**
|
||||
|
||||
### **Your Actual Deployment Commands**
|
||||
```bash
|
||||
# 🏭 REAL COMMANDS FROM YOUR CODEBASE
|
||||
# From freeleaps-devops-reconciler/scripts/deploy.sh
|
||||
|
||||
# Helm deployment (primary method)
|
||||
helm install/upgrade "$RELEASE_NAME" . \
|
||||
--namespace "$NAMESPACE" \
|
||||
--create-namespace \
|
||||
-f "$VALUES_FILE" \
|
||||
--set "image.tag=$IMAGE_TAG"
|
||||
|
||||
# kubectl apply (secondary method)
|
||||
kubectl apply -f <directory>/
|
||||
|
||||
# Status checking
|
||||
kubectl get pods -n "$NAMESPACE" -l "app.kubernetes.io/name=freeleaps-devops-reconciler"
|
||||
kubectl logs -n "$NAMESPACE" deployment/"$RELEASE_NAME"
|
||||
```
|
||||
|
||||
### **Your Actual Namespace Structure**
|
||||
```bash
|
||||
# 🏭 YOUR REAL NAMESPACES
|
||||
kubectl get namespaces
|
||||
|
||||
# Production namespaces:
|
||||
# - freeleaps-controls-system (ingress, cert-manager)
|
||||
# - freeleaps-devops-system (ArgoCD)
|
||||
# - freeleaps-prod (Gitea)
|
||||
# - magicleaps (main application)
|
||||
# - freeleaps-alpha (testing)
|
||||
```
|
||||
|
||||
### **Your Actual Storage Classes**
|
||||
```bash
|
||||
# 🏭 YOUR REAL STORAGE CLASSES
|
||||
kubectl get storageclass
|
||||
|
||||
# Azure storage classes used:
|
||||
# - managed-premium (SSD)
|
||||
# - managed-standard (HDD)
|
||||
# - azure-disk-std-lrs (standard disk)
|
||||
```
|
||||
|
||||
### **Your Actual Resource Naming Conventions**
|
||||
```yaml
|
||||
# 🏭 YOUR REAL NAMING PATTERNS
|
||||
# From freeleaps-service-hub deployment guidelines
|
||||
|
||||
# Resource naming: {APP_NAME}-{RESOURCE_NAME}
|
||||
# Examples:
|
||||
# - payment-deployment
|
||||
# - payment-service
|
||||
# - payment-configmap
|
||||
|
||||
# Namespace: same as repository name
|
||||
# Examples:
|
||||
# - freeleaps-service-hub
|
||||
# - freeleaps-ops
|
||||
# - magicleaps
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
832
docs/Kubernetes_Fundamentals_For_Junior_Engineers.md
Normal file
832
docs/Kubernetes_Fundamentals_For_Junior_Engineers.md
Normal file
@ -0,0 +1,832 @@
|
||||
# Kubernetes Fundamentals for Junior Engineers
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This guide is designed for junior engineers starting their DevOps journey. It covers the essential Kubernetes concepts you'll encounter daily, with practical examples and real-world scenarios.
|
||||
|
||||
---
|
||||
|
||||
## 📋 **Prerequisites**
|
||||
|
||||
Before diving into these concepts, make sure you understand:
|
||||
- ✅ **Pods**: Basic container units
|
||||
- ✅ **Namespaces**: Resource organization
|
||||
- ✅ **PVCs**: Persistent storage
|
||||
- ✅ **Basic kubectl commands**
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **1. Deployments (The Right Way to Run Apps)**
|
||||
|
||||
### **Why Deployments?**
|
||||
|
||||
**Never create Pods directly!** Deployments are the standard way to run applications because they provide:
|
||||
- **Replicas**: Run multiple copies of your app
|
||||
- **Rolling updates**: Zero-downtime deployments
|
||||
- **Rollback**: Easy recovery from failed deployments
|
||||
- **Self-healing**: Automatically restart failed pods
|
||||
|
||||
### **Deployment Structure**
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: web-app
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: web-app
|
||||
version: v1
|
||||
spec:
|
||||
replicas: 3 # Run 3 copies
|
||||
selector:
|
||||
matchLabels:
|
||||
app: web-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: web-app
|
||||
version: v1
|
||||
spec:
|
||||
containers:
|
||||
- name: web-app
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
```
|
||||
|
||||
### **Managing Deployments**
|
||||
|
||||
```bash
|
||||
# Create deployment
|
||||
kubectl apply -f deployment.yaml
|
||||
|
||||
# Check deployment status
|
||||
kubectl get deployments
|
||||
kubectl describe deployment web-app
|
||||
|
||||
# Scale deployment
|
||||
kubectl scale deployment web-app --replicas=5
|
||||
|
||||
# Update deployment (change image)
|
||||
kubectl set image deployment/web-app web-app=nginx:1.21
|
||||
|
||||
# Rollback to previous version
|
||||
kubectl rollout undo deployment/web-app
|
||||
|
||||
# Check rollout status
|
||||
kubectl rollout status deployment/web-app
|
||||
|
||||
# View rollout history
|
||||
kubectl rollout history deployment/web-app
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🌐 **2. Services (Network Communication)**
|
||||
|
||||
### **Why Services?**
|
||||
|
||||
Pods are ephemeral (temporary). Services provide:
|
||||
- **Stable IP addresses** for your applications
|
||||
- **Load balancing** across multiple pods
|
||||
- **Service discovery** within the cluster
|
||||
- **External access** to your applications
|
||||
|
||||
### **Service Types**
|
||||
|
||||
#### **ClusterIP (Internal Access)**
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-service
|
||||
namespace: my-app
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
```
|
||||
|
||||
#### **NodePort (External Access via Node)**
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-nodeport
|
||||
namespace: my-app
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
nodePort: 30080 # Access via node IP:30080
|
||||
protocol: TCP
|
||||
```
|
||||
|
||||
#### **LoadBalancer (Cloud Load Balancer)**
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-lb
|
||||
namespace: my-app
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
```
|
||||
|
||||
### **Managing Services**
|
||||
|
||||
```bash
|
||||
# Create service
|
||||
kubectl apply -f service.yaml
|
||||
|
||||
# List services
|
||||
kubectl get services
|
||||
kubectl get svc
|
||||
|
||||
# Get service details
|
||||
kubectl describe service web-app-service
|
||||
|
||||
# Test service connectivity
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- wget -O- web-app-service:80
|
||||
|
||||
# Port forward for testing
|
||||
kubectl port-forward service/web-app-service 8080:80
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **3. ConfigMaps & Secrets (Configuration Management)**
|
||||
|
||||
### **Why ConfigMaps & Secrets?**
|
||||
|
||||
Applications need configuration. These provide:
|
||||
- **Environment-specific settings** (dev, staging, prod)
|
||||
- **Secure credential storage**
|
||||
- **Configuration without rebuilding images**
|
||||
- **Centralized configuration management**
|
||||
|
||||
### **ConfigMaps**
|
||||
|
||||
#### **Creating ConfigMaps**
|
||||
|
||||
```bash
|
||||
# From literal values
|
||||
kubectl create configmap app-config \
|
||||
--from-literal=DB_HOST=postgres-service \
|
||||
--from-literal=DB_PORT=5432 \
|
||||
--from-literal=ENVIRONMENT=production
|
||||
|
||||
# From file
|
||||
kubectl create configmap app-config --from-file=config.properties
|
||||
|
||||
# From YAML
|
||||
kubectl apply -f configmap.yaml
|
||||
```
|
||||
|
||||
#### **ConfigMap YAML**
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: app-config
|
||||
namespace: my-app
|
||||
data:
|
||||
# Simple key-value pairs
|
||||
DB_HOST: "postgres-service"
|
||||
DB_PORT: "5432"
|
||||
ENVIRONMENT: "production"
|
||||
|
||||
# File-like content
|
||||
config.properties: |
|
||||
server.port=8080
|
||||
logging.level=INFO
|
||||
cache.enabled=true
|
||||
```
|
||||
|
||||
#### **Using ConfigMaps in Pods**
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: web-app
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: web-app
|
||||
image: my-app:latest
|
||||
env:
|
||||
# Environment variables
|
||||
- name: DB_HOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_HOST
|
||||
- name: DB_PORT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_PORT
|
||||
volumeMounts:
|
||||
# Mount as files
|
||||
- name: config-volume
|
||||
mountPath: /app/config
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: app-config
|
||||
```
|
||||
|
||||
### **Secrets**
|
||||
|
||||
#### **Creating Secrets**
|
||||
|
||||
```bash
|
||||
# From literal values
|
||||
kubectl create secret generic db-secret \
|
||||
--from-literal=DB_USERNAME=admin \
|
||||
--from-literal=DB_PASSWORD=secret123
|
||||
|
||||
# From file
|
||||
kubectl create secret generic tls-secret \
|
||||
--from-file=tls.crt=cert.pem \
|
||||
--from-file=tls.key=key.pem
|
||||
```
|
||||
|
||||
#### **Secret YAML**
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: db-secret
|
||||
namespace: my-app
|
||||
type: Opaque
|
||||
data:
|
||||
# Base64 encoded values
|
||||
DB_USERNAME: YWRtaW4= # admin
|
||||
DB_PASSWORD: c2VjcmV0MTIz # secret123
|
||||
```
|
||||
|
||||
#### **Using Secrets in Pods**
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: web-app
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: web-app
|
||||
image: my-app:latest
|
||||
env:
|
||||
- name: DB_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_USERNAME
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_PASSWORD
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **4. Ingress (External Access & Routing)**
|
||||
|
||||
### **Why Ingress?**
|
||||
|
||||
Ingress provides:
|
||||
- **URL-based routing** (example.com/api, example.com/web)
|
||||
- **SSL/TLS termination**
|
||||
- **Load balancing**
|
||||
- **Name-based virtual hosting**
|
||||
|
||||
### **Ingress Controller**
|
||||
|
||||
First, ensure you have an Ingress controller (like nginx-ingress):
|
||||
|
||||
```bash
|
||||
# Check if ingress controller exists
|
||||
kubectl get pods -n ingress-nginx
|
||||
|
||||
# If not, install nginx-ingress
|
||||
kubectl apply -f https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.8.2/deploy/static/provider/cloud/deploy.yaml
|
||||
```
|
||||
|
||||
### **Ingress Resource**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: web-app-ingress
|
||||
namespace: my-app
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/rewrite-target: /
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- myapp.example.com
|
||||
secretName: myapp-tls
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: web-app-service
|
||||
port:
|
||||
number: 80
|
||||
- path: /api
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-service
|
||||
port:
|
||||
number: 8080
|
||||
```
|
||||
|
||||
### **Managing Ingress**
|
||||
|
||||
```bash
|
||||
# Apply ingress
|
||||
kubectl apply -f ingress.yaml
|
||||
|
||||
# Check ingress status
|
||||
kubectl get ingress
|
||||
kubectl describe ingress web-app-ingress
|
||||
|
||||
# Test ingress
|
||||
curl -H "Host: myapp.example.com" http://your-cluster-ip/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔄 **5. Jobs & CronJobs (Batch Processing)**
|
||||
|
||||
### **Why Jobs & CronJobs?**
|
||||
|
||||
For tasks that need to:
|
||||
- **Run to completion** (not continuously)
|
||||
- **Execute on schedule** (daily backups, reports)
|
||||
- **Process data** (ETL jobs, batch processing)
|
||||
|
||||
### **Jobs**
|
||||
|
||||
```yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: data-processing-job
|
||||
namespace: my-app
|
||||
spec:
|
||||
completions: 3 # Run 3 times
|
||||
parallelism: 2 # Run 2 in parallel
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: data-processor
|
||||
image: data-processor:latest
|
||||
command: ["python", "process_data.py"]
|
||||
env:
|
||||
- name: INPUT_FILE
|
||||
value: "/data/input.csv"
|
||||
- name: OUTPUT_FILE
|
||||
value: "/data/output.csv"
|
||||
volumeMounts:
|
||||
- name: data-volume
|
||||
mountPath: /data
|
||||
volumes:
|
||||
- name: data-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: data-pvc
|
||||
restartPolicy: Never
|
||||
```
|
||||
|
||||
### **CronJobs**
|
||||
|
||||
```yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: daily-backup
|
||||
namespace: my-app
|
||||
spec:
|
||||
schedule: "0 2 * * *" # Daily at 2 AM
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: backup
|
||||
image: backup-tool:latest
|
||||
command: ["/bin/bash", "-c"]
|
||||
args:
|
||||
- |
|
||||
echo "Starting backup at $(date)"
|
||||
pg_dump -h postgres-service -U admin mydb > /backup/backup-$(date +%Y%m%d).sql
|
||||
echo "Backup completed at $(date)"
|
||||
env:
|
||||
- name: PGPASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_PASSWORD
|
||||
volumeMounts:
|
||||
- name: backup-volume
|
||||
mountPath: /backup
|
||||
volumes:
|
||||
- name: backup-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: backup-pvc
|
||||
restartPolicy: OnFailure
|
||||
```
|
||||
|
||||
### **Managing Jobs & CronJobs**
|
||||
|
||||
```bash
|
||||
# Create job
|
||||
kubectl apply -f job.yaml
|
||||
|
||||
# Check job status
|
||||
kubectl get jobs
|
||||
kubectl describe job data-processing-job
|
||||
|
||||
# View job logs
|
||||
kubectl logs job/data-processing-job
|
||||
|
||||
# Create cronjob
|
||||
kubectl apply -f cronjob.yaml
|
||||
|
||||
# Check cronjob status
|
||||
kubectl get cronjobs
|
||||
kubectl describe cronjob daily-backup
|
||||
|
||||
# Suspend cronjob
|
||||
kubectl patch cronjob daily-backup -p '{"spec" : {"suspend" : true}}'
|
||||
|
||||
# Resume cronjob
|
||||
kubectl patch cronjob daily-backup -p '{"spec" : {"suspend" : false}}'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📊 **6. Resource Management & Limits**
|
||||
|
||||
### **Why Resource Management?**
|
||||
|
||||
To prevent:
|
||||
- **Resource starvation** (one app consuming all CPU/memory)
|
||||
- **Node failures** (out of memory)
|
||||
- **Poor performance** (over-subscription)
|
||||
|
||||
### **Resource Requests & Limits**
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: resource-managed-app
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: app
|
||||
image: my-app:latest
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi" # Minimum guaranteed
|
||||
cpu: "250m" # 0.25 CPU cores
|
||||
limits:
|
||||
memory: "128Mi" # Maximum allowed
|
||||
cpu: "500m" # 0.5 CPU cores
|
||||
```
|
||||
|
||||
### **Resource Quotas**
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ResourceQuota
|
||||
metadata:
|
||||
name: namespace-quota
|
||||
namespace: my-app
|
||||
spec:
|
||||
hard:
|
||||
requests.cpu: "4" # 4 CPU cores total
|
||||
requests.memory: 8Gi # 8GB memory total
|
||||
limits.cpu: "8" # 8 CPU cores max
|
||||
limits.memory: 16Gi # 16GB memory max
|
||||
pods: "20" # 20 pods max
|
||||
services: "10" # 10 services max
|
||||
persistentvolumeclaims: "10" # 10 PVCs max
|
||||
```
|
||||
|
||||
### **Managing Resources**
|
||||
|
||||
```bash
|
||||
# Check resource usage
|
||||
kubectl top pods
|
||||
kubectl top nodes
|
||||
|
||||
# Check quotas
|
||||
kubectl get resourcequota
|
||||
kubectl describe resourcequota namespace-quota
|
||||
|
||||
# Check resource requests/limits
|
||||
kubectl describe pod <pod-name> | grep -A 10 "Limits"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **7. Monitoring & Debugging**
|
||||
|
||||
### **Essential Commands**
|
||||
|
||||
```bash
|
||||
# Check cluster health
|
||||
kubectl get nodes
|
||||
kubectl get pods --all-namespaces
|
||||
|
||||
# Check specific resources
|
||||
kubectl get deployments,services,pods -n my-app
|
||||
|
||||
# View logs
|
||||
kubectl logs <pod-name>
|
||||
kubectl logs <pod-name> -f # Follow logs
|
||||
kubectl logs <pod-name> --previous # Previous container
|
||||
|
||||
# Execute commands in pods
|
||||
kubectl exec -it <pod-name> -- /bin/bash
|
||||
kubectl exec <pod-name> -- ls /app
|
||||
|
||||
# Port forwarding for debugging
|
||||
kubectl port-forward <pod-name> 8080:80
|
||||
kubectl port-forward service/<service-name> 8080:80
|
||||
|
||||
# Check events
|
||||
kubectl get events --sort-by='.lastTimestamp'
|
||||
kubectl get events -n my-app
|
||||
|
||||
# Check resource usage
|
||||
kubectl top pods
|
||||
kubectl top nodes
|
||||
```
|
||||
|
||||
### **Common Debugging Scenarios**
|
||||
|
||||
#### **Pod Stuck in Pending**
|
||||
```bash
|
||||
# Check why pod can't be scheduled
|
||||
kubectl describe pod <pod-name>
|
||||
|
||||
# Check node resources
|
||||
kubectl describe node <node-name>
|
||||
|
||||
# Check events
|
||||
kubectl get events --sort-by='.lastTimestamp'
|
||||
```
|
||||
|
||||
#### **Pod Crashing**
|
||||
```bash
|
||||
# Check pod status
|
||||
kubectl get pods
|
||||
kubectl describe pod <pod-name>
|
||||
|
||||
# Check logs
|
||||
kubectl logs <pod-name>
|
||||
kubectl logs <pod-name> --previous
|
||||
|
||||
# Check resource usage
|
||||
kubectl top pod <pod-name>
|
||||
```
|
||||
|
||||
#### **Service Not Working**
|
||||
```bash
|
||||
# Check service endpoints
|
||||
kubectl get endpoints <service-name>
|
||||
|
||||
# Check service configuration
|
||||
kubectl describe service <service-name>
|
||||
|
||||
# Test service connectivity
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- wget -O- <service-name>:<port>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔒 **8. Security Best Practices**
|
||||
|
||||
### **Pod Security**
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: secure-app
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 2000
|
||||
containers:
|
||||
- name: app
|
||||
image: my-app:latest
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: tmp-volume
|
||||
mountPath: /tmp
|
||||
volumes:
|
||||
- name: tmp-volume
|
||||
emptyDir: {}
|
||||
```
|
||||
|
||||
### **Network Policies**
|
||||
|
||||
```yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: default-deny
|
||||
namespace: my-app
|
||||
spec:
|
||||
podSelector: {}
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: allow-web-traffic
|
||||
namespace: my-app
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: web-app
|
||||
policyTypes:
|
||||
- Ingress
|
||||
ingress:
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
name: frontend
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 80
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **9. Best Practices for Junior Engineers**
|
||||
|
||||
### **1. Always Use Deployments (Not Pods)**
|
||||
```bash
|
||||
# ❌ Don't do this
|
||||
kubectl run nginx --image=nginx
|
||||
|
||||
# ✅ Do this
|
||||
kubectl create deployment nginx --image=nginx
|
||||
```
|
||||
|
||||
### **2. Use Namespaces for Organization**
|
||||
```bash
|
||||
# Create namespaces for different environments
|
||||
kubectl create namespace development
|
||||
kubectl create namespace staging
|
||||
kubectl create namespace production
|
||||
```
|
||||
|
||||
### **3. Set Resource Limits**
|
||||
```yaml
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "500m"
|
||||
```
|
||||
|
||||
### **4. Use Health Checks**
|
||||
```yaml
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
```
|
||||
|
||||
### **5. Use Labels and Selectors**
|
||||
```yaml
|
||||
metadata:
|
||||
labels:
|
||||
app: web-app
|
||||
version: v1
|
||||
environment: production
|
||||
team: backend
|
||||
```
|
||||
|
||||
### **6. Use ConfigMaps and Secrets**
|
||||
```bash
|
||||
# Store configuration externally
|
||||
kubectl create configmap app-config --from-literal=DB_HOST=postgres
|
||||
kubectl create secret generic db-secret --from-literal=DB_PASSWORD=secret123
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **10. Next Steps**
|
||||
|
||||
### **Advanced Concepts to Learn**
|
||||
- **StatefulSets**: For stateful applications (databases)
|
||||
- **DaemonSets**: For node-level services (monitoring agents)
|
||||
- **Horizontal Pod Autoscaler (HPA)**: Automatic scaling
|
||||
- **Vertical Pod Autoscaler (VPA)**: Resource optimization
|
||||
- **Pod Disruption Budgets**: Availability guarantees
|
||||
- **Pod Security Standards**: Security policies
|
||||
|
||||
### **Tools to Master**
|
||||
- **Helm**: Package manager for Kubernetes
|
||||
- **Kustomize**: Configuration management
|
||||
- **ArgoCD**: GitOps deployment
|
||||
- **Prometheus & Grafana**: Monitoring
|
||||
- **Fluentd/Elasticsearch**: Logging
|
||||
|
||||
### **Practice Projects**
|
||||
1. **Simple Web App**: Deploy nginx with database
|
||||
2. **API Service**: Deploy REST API with authentication
|
||||
3. **Batch Job**: Create data processing pipeline
|
||||
4. **Monitoring Stack**: Deploy Prometheus + Grafana
|
||||
5. **CI/CD Pipeline**: Automate deployments
|
||||
|
||||
---
|
||||
|
||||
## 🆘 **Troubleshooting Quick Reference**
|
||||
|
||||
### **Common Issues & Solutions**
|
||||
|
||||
| Issue | Command | What to Check |
|
||||
|-------|---------|---------------|
|
||||
| Pod not starting | `kubectl describe pod <name>` | Events, resource limits |
|
||||
| Service not working | `kubectl get endpoints <service>` | Pod labels, service selector |
|
||||
| Deployment stuck | `kubectl rollout status deployment/<name>` | Image pull, resource limits |
|
||||
| Ingress not working | `kubectl describe ingress <name>` | Ingress controller, TLS |
|
||||
| High resource usage | `kubectl top pods` | Resource limits, memory leaks |
|
||||
|
||||
### **Useful Aliases**
|
||||
```bash
|
||||
# Add to your .bashrc or .zshrc
|
||||
alias k='kubectl'
|
||||
alias kg='kubectl get'
|
||||
alias kd='kubectl describe'
|
||||
alias kl='kubectl logs'
|
||||
alias ke='kubectl exec -it'
|
||||
alias kp='kubectl port-forward'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
608
docs/PVC_Deep_Dive_Guide.md
Normal file
608
docs/PVC_Deep_Dive_Guide.md
Normal file
@ -0,0 +1,608 @@
|
||||
# PVC Deep Dive Guide: Understanding Persistent Storage in Kubernetes
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
This guide explains **Persistent Volume Claims (PVCs)** in detail, why they're essential, and how your current Kubernetes setup uses them. PVCs are crucial for applications that need to store data that survives pod restarts, crashes, or migrations.
|
||||
|
||||
---
|
||||
|
||||
## 📊 **How PVCs Work: Visual Explanation**
|
||||
|
||||
### **🔄 PVC Lifecycle Flow**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ PVC LIFECYCLE │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ DEVELOPER │ │ PVC │ │ PV │ │ STORAGE │ │
|
||||
│ │ Creates │ │ Requests │ │ Provides │ │ Backend │ │
|
||||
│ │ PVC │ │ Storage │ │ Storage │ │ (Azure) │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ │ 1. Create PVC │ │ │ │
|
||||
│ │───────────────▶│ │ │ │
|
||||
│ │ │ 2. Find PV │ │ │
|
||||
│ │ │───────────────▶│ │ │
|
||||
│ │ │ │ 3. Provision │ │
|
||||
│ │ │ │───────────────▶│ │
|
||||
│ │ │ │ │ 4. Create Disk │
|
||||
│ │ │ │ │◀───────────────│
|
||||
│ │ │ │ 5. Bind PV │ │
|
||||
│ │ │ │◀───────────────│ │
|
||||
│ │ │ 6. Bind PVC │ │ │
|
||||
│ │ │◀───────────────│ │ │
|
||||
│ │ 7. Ready │ │ │ │
|
||||
│ │◀───────────────│ │ │ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### **🏗️ Storage Architecture**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ STORAGE ARCHITECTURE │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ KUBERNETES CLUSTER │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ POD 1 │ │ POD 2 │ │ POD 3 │ │ POD 4 │ │ │
|
||||
│ │ │ │ │ │ │ │ │ │ │ │
|
||||
│ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │ │
|
||||
│ │ │ │ Volume │ │ │ │ Volume │ │ │ │ Volume │ │ │ │ Volume │ │ │ │
|
||||
│ │ │ │ Mount │ │ │ │ Mount │ │ │ │ Mount │ │ │ │ Mount │ │ │ │
|
||||
│ │ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ │ │ │ │ │ │ │
|
||||
│ │ └────────────────┼────────────────┼────────────────┘ │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ ┌─────────────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ │ PVCs │ │ │
|
||||
│ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │
|
||||
│ │ │ │ PVC: gitea │ │ PVC: mongo │ │ PVC: logs │ │ PVC: jenkins│ │ │ │ │
|
||||
│ │ │ │ 15Gi │ │ 8Gi │ │ 1Gi │ │ 50Gi │ │ │ │ │
|
||||
│ │ │ │ RWO │ │ RWO │ │ RWO │ │ RWO │ │ │ │ │
|
||||
│ │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │ │
|
||||
│ │ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
│ │ │ │ │
|
||||
│ │ ┌─────────────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ │ PVs │ │ │
|
||||
│ │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │
|
||||
│ │ │ │ PV: gitea │ │ PV: mongo │ │ PV: logs │ │ PV: jenkins │ │ │ │ │
|
||||
│ │ │ │ 15Gi │ │ 8Gi │ │ 1Gi │ │ 50Gi │ │ │ │ │
|
||||
│ │ │ │ azure-disk │ │ azure-disk │ │ azure-disk │ │ azure-disk │ │ │ │ │
|
||||
│ │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │ │
|
||||
│ │ └─────────────────────────────────────────────────────────────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ AZURE STORAGE BACKEND │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ Managed Disk│ │ Managed Disk│ │ Managed Disk│ │ Managed Disk│ │ │
|
||||
│ │ │ 15Gi SSD │ │ 8Gi SSD │ │ 1Gi SSD │ │ 50Gi SSD │ │ │ │
|
||||
│ │ │ Premium │ │ Premium │ │ Standard │ │ Standard │ │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🤔 **Why Each Pod Needs PVC: The Data Persistence Problem**
|
||||
|
||||
### **❌ Without PVC: Data Loss Scenario**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ WITHOUT PVC (BAD) │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ POD 1 │ │ POD 2 │ │ POD 3 │ │ POD 4 │ │
|
||||
│ │ nginx:latest│ │ nginx:latest│ │ nginx:latest│ │ nginx:latest│ │
|
||||
│ │ │ │ │ │ │ │ │ │
|
||||
│ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │
|
||||
│ │ │ /tmp │ │ │ │ /tmp │ │ │ │ /tmp │ │ │ │ /tmp │ │ │
|
||||
│ │ │ (temp) │ │ │ │ (temp) │ │ │ │ (temp) │ │ │ │ (temp) │ │ │
|
||||
│ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │
|
||||
│ 🔄 Pod Restart/Delete → ❌ ALL DATA LOST │
|
||||
│ │
|
||||
│ ❌ User uploads gone │
|
||||
│ ❌ Database files gone │
|
||||
│ ❌ Configuration gone │
|
||||
│ ❌ Logs gone │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### **✅ With PVC: Data Persistence**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ WITH PVC (GOOD) │
|
||||
│ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ POD 1 │ │ POD 2 │ │ POD 3 │ │ POD 4 │ │
|
||||
│ │ nginx:latest│ │ nginx:latest│ │ nginx:latest│ │ nginx:latest│ │
|
||||
│ │ │ │ │ │ │ │ │ │
|
||||
│ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │
|
||||
│ │ │ /data │ │ │ │ /data │ │ │ │ /data │ │ │ │ /data │ │ │
|
||||
│ │ │ (PVC) │ │ │ │ (PVC) │ │ │ │ (PVC) │ │ │ │ (PVC) │ │ │
|
||||
│ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │ └─────────┘ │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ │ │ │ │ │
|
||||
│ └────────────────┼────────────────┼────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ SHARED STORAGE │ │
|
||||
│ │ ┌─────────────────────────────────────────────────────────────┐ │ │
|
||||
│ │ │ 📁 /data │ │ │
|
||||
│ │ │ ├── 📄 user-uploads/ │ │ │
|
||||
│ │ │ ├── 📄 database/ │ │ │
|
||||
│ │ │ ├── 📄 config/ │ │ │
|
||||
│ │ │ └── 📄 logs/ │ │ │
|
||||
│ │ └─────────────────────────────────────────────────────────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
│ 🔄 Pod Restart/Delete → ✅ DATA PERSISTS │
|
||||
│ │
|
||||
│ ✅ User uploads preserved │
|
||||
│ ✅ Database files preserved │
|
||||
│ ✅ Configuration preserved │
|
||||
│ ✅ Logs preserved │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏭 **Your Current Kubernetes Setup: PVC Analysis**
|
||||
|
||||
### **📊 Your Actual PVC Usage**
|
||||
|
||||
Based on your codebase analysis, here's how PVCs are currently used:
|
||||
|
||||
#### **1. Gitea (Git Repository)**
|
||||
```yaml
|
||||
# 🏭 ACTUAL CONFIGURATION FROM YOUR CODEBASE
|
||||
# freeleaps-ops/freeleaps/helm-pkg/3rd/gitea/values.prod.yaml
|
||||
persistence:
|
||||
enabled: true
|
||||
create: true
|
||||
mount: true
|
||||
claimName: gitea-shared-storage
|
||||
size: 15Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClass: azure-disk-std-lrs
|
||||
annotations:
|
||||
helm.sh/resource-policy: keep
|
||||
```
|
||||
|
||||
**What this means:**
|
||||
- ✅ **Gitea uses PVC** for storing repositories, user data, and configuration
|
||||
- ✅ **15GB storage** allocated for Git repositories and user data
|
||||
- ✅ **Azure Standard Disk** (cost-effective for this use case)
|
||||
- ✅ **ReadWriteOnce** - only one pod can access at a time
|
||||
- ✅ **Data persists** when Gitea pod restarts
|
||||
|
||||
#### **2. MongoDB (Database)**
|
||||
```yaml
|
||||
# 🏭 ACTUAL CONFIGURATION FROM YOUR CODEBASE
|
||||
# freeleaps-ops/freeleaps/helm-pkg/3rd/mongo/values.yaml
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 8Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClass: "" # Uses default Azure storage class
|
||||
```
|
||||
|
||||
**What this means:**
|
||||
- ✅ **MongoDB uses PVC** for database files
|
||||
- ✅ **8GB storage** for database data
|
||||
- ✅ **Data persists** when MongoDB pod restarts
|
||||
- ✅ **Critical for data integrity**
|
||||
|
||||
#### **3. Jenkins (CI/CD)**
|
||||
```yaml
|
||||
# 🏭 ACTUAL CONFIGURATION FROM YOUR CODEBASE
|
||||
# freeleaps-ops/cluster/manifests/freeleaps-devops-system/jenkins/values.yaml
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: azure-blob-fuse-2-std-lrs
|
||||
accessMode: "ReadWriteOnce"
|
||||
size: "50Gi"
|
||||
```
|
||||
|
||||
**What this means:**
|
||||
- ✅ **Jenkins uses PVC** for build artifacts, workspace data
|
||||
- ✅ **50GB storage** for build history and artifacts
|
||||
- ✅ **Azure Blob Storage** (cost-effective for large files)
|
||||
- ✅ **Build history preserved** across pod restarts
|
||||
|
||||
#### **4. Central Storage (Logs)**
|
||||
```yaml
|
||||
# 🏭 ACTUAL CONFIGURATION FROM YOUR CODEBASE
|
||||
# freeleaps-ops/freeleaps/helm-pkg/centralStorage/templates/central-storage/pvc.yaml
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 1Gi
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
```
|
||||
|
||||
**What this means:**
|
||||
- ✅ **Central storage uses PVC** for log ingestion
|
||||
- ✅ **1GB storage** for log processing
|
||||
- ✅ **Logs preserved** during processing
|
||||
|
||||
### **📋 PVC Usage Summary**
|
||||
|
||||
| Application | PVC Name | Size | Storage Class | Purpose | Critical? |
|
||||
|-------------|----------|------|---------------|---------|-----------|
|
||||
| **Gitea** | `gitea-shared-storage` | 15Gi | `azure-disk-std-lrs` | Git repositories, user data | 🔴 **Critical** |
|
||||
| **MongoDB** | `mongodb-datadir` | 8Gi | Default | Database files | 🔴 **Critical** |
|
||||
| **Jenkins** | `jenkins-pvc` | 50Gi | `azure-blob-fuse-2-std-lrs` | Build artifacts, workspace | 🟡 **Important** |
|
||||
| **Central Storage** | `central-storage-logs-pvc` | 1Gi | Default | Log processing | 🟢 **Nice to have** |
|
||||
|
||||
---
|
||||
|
||||
## 🤷♂️ **Does Each Pod Need PVC? NO!**
|
||||
|
||||
### **❌ Common Misconception**
|
||||
|
||||
**"Every pod needs a PVC"** - This is **WRONG**!
|
||||
|
||||
### **✅ Reality: PVCs Are Optional**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ PVC DECISION TREE │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ DOES YOUR APP NEED PERSISTENT DATA? │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ YES │ │ NO │ │ │
|
||||
│ │ │ │ │ │ │ │
|
||||
│ │ │ ┌─────────┐ │ │ ┌─────────┐ │ │ │
|
||||
│ │ │ │ USE │ │ │ │ DON'T │ │ │ │
|
||||
│ │ │ │ PVC │ │ │ │ USE │ │ │ │
|
||||
│ │ │ │ │ │ │ │ PVC │ │ │ │
|
||||
│ │ │ └─────────┘ │ │ └─────────┘ │ │ │
|
||||
│ │ └─────────────┘ └─────────────┘ │ │
|
||||
│ │ │ │
|
||||
│ │ Examples: │ │
|
||||
│ │ • Databases (PostgreSQL, MongoDB) │ │
|
||||
│ │ • File storage (Gitea, Jenkins) │ │
|
||||
│ │ • Application data (user uploads) │ │
|
||||
│ │ • Logs (if you want to keep them) │ │
|
||||
│ │ │ │
|
||||
│ │ Examples: │ │
|
||||
│ │ • Web servers (nginx, static content) │ │
|
||||
│ │ • API servers (stateless applications) │ │
|
||||
│ │ • Cache servers (Redis, Memcached) │ │
|
||||
│ │ • Load balancers │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### **📊 Your Current Setup Analysis**
|
||||
|
||||
Looking at your applications:
|
||||
|
||||
#### **✅ Applications WITH PVCs (Need Persistent Data)**
|
||||
- **Gitea**: Git repositories, user data, configuration
|
||||
- **MongoDB**: Database files
|
||||
- **Jenkins**: Build artifacts, workspace data
|
||||
- **Central Storage**: Log processing
|
||||
|
||||
#### **❌ Applications WITHOUT PVCs (Stateless)**
|
||||
- **Nginx Ingress Controller**: Stateless routing
|
||||
- **ArgoCD**: GitOps configuration (stored in Git)
|
||||
- **Cert-manager**: Certificate management (stateless)
|
||||
- **Prometheus/Grafana**: Metrics (can use PVC for data retention)
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **PVC Considerations: When to Use Them**
|
||||
|
||||
### **✅ Use PVCs When:**
|
||||
|
||||
#### **1. Database Applications**
|
||||
```yaml
|
||||
# Database needs persistent storage
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: postgres
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: postgres
|
||||
image: postgres:13
|
||||
volumeMounts:
|
||||
- name: db-storage
|
||||
mountPath: /var/lib/postgresql/data
|
||||
volumes:
|
||||
- name: db-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: postgres-pvc
|
||||
```
|
||||
|
||||
#### **2. File Storage Applications**
|
||||
```yaml
|
||||
# File server needs persistent storage
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: file-server
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: file-server
|
||||
image: nginx:latest
|
||||
volumeMounts:
|
||||
- name: file-storage
|
||||
mountPath: /var/www/html
|
||||
volumes:
|
||||
- name: file-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: file-storage-pvc
|
||||
```
|
||||
|
||||
#### **3. Application Data**
|
||||
```yaml
|
||||
# Application needs to store user data
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: my-app
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: my-app
|
||||
image: my-app:latest
|
||||
volumeMounts:
|
||||
- name: app-data
|
||||
mountPath: /app/data
|
||||
volumes:
|
||||
- name: app-data
|
||||
persistentVolumeClaim:
|
||||
claimName: app-data-pvc
|
||||
```
|
||||
|
||||
### **❌ Don't Use PVCs When:**
|
||||
|
||||
#### **1. Stateless Web Servers**
|
||||
```yaml
|
||||
# Web server doesn't need persistent storage
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: web-server
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: web-server
|
||||
image: nginx:latest
|
||||
# No volumeMounts needed - stateless
|
||||
```
|
||||
|
||||
#### **2. API Servers**
|
||||
```yaml
|
||||
# API server doesn't need persistent storage
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: api-server
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: api-server
|
||||
image: my-api:latest
|
||||
# No volumeMounts needed - stateless
|
||||
```
|
||||
|
||||
#### **3. Cache Servers**
|
||||
```yaml
|
||||
# Cache server doesn't need persistent storage
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: redis-cache
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:latest
|
||||
# No volumeMounts needed - cache is temporary
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **PVC Configuration Options**
|
||||
|
||||
### **1. Access Modes**
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: my-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce # Single node read/write (most common)
|
||||
- ReadOnlyMany # Multiple nodes read-only
|
||||
- ReadWriteMany # Multiple nodes read/write (rare)
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
```
|
||||
|
||||
### **2. Storage Classes**
|
||||
|
||||
```yaml
|
||||
# Azure Storage Classes Available
|
||||
storageClass: azure-disk-std-lrs # Standard HDD (cheapest)
|
||||
storageClass: azure-disk-premium-lrs # Premium SSD (fastest)
|
||||
storageClass: azure-blob-fuse-2-std-lrs # Blob storage (for large files)
|
||||
```
|
||||
|
||||
### **3. Size Considerations**
|
||||
|
||||
```yaml
|
||||
# Size your PVCs appropriately
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi # Small: logs, config
|
||||
storage: 10Gi # Medium: databases
|
||||
storage: 100Gi # Large: file storage, backups
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚨 **Common PVC Mistakes**
|
||||
|
||||
### **❌ Mistake 1: Using PVC for Everything**
|
||||
```yaml
|
||||
# ❌ DON'T DO THIS
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: nginx
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: nginx
|
||||
image: nginx:latest
|
||||
volumeMounts:
|
||||
- name: temp-storage # ❌ Unnecessary PVC
|
||||
mountPath: /tmp
|
||||
volumes:
|
||||
- name: temp-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: temp-pvc # ❌ Waste of resources
|
||||
```
|
||||
|
||||
### **❌ Mistake 2: Not Setting Resource Limits**
|
||||
```yaml
|
||||
# ❌ DON'T DO THIS
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: unlimited-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
# ❌ No size limit - could consume all storage
|
||||
```
|
||||
|
||||
### **✅ Correct Approach**
|
||||
```yaml
|
||||
# ✅ DO THIS
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: limited-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi # ✅ Set appropriate size
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Best Practices**
|
||||
|
||||
### **1. Size Appropriately**
|
||||
- Start small and scale up
|
||||
- Monitor actual usage
|
||||
- Use storage quotas
|
||||
|
||||
### **2. Choose Right Storage Class**
|
||||
- **Standard HDD**: Cost-effective for backups, logs
|
||||
- **Premium SSD**: Performance-critical databases
|
||||
- **Blob Storage**: Large files, archives
|
||||
|
||||
### **3. Use Labels and Annotations**
|
||||
```yaml
|
||||
metadata:
|
||||
name: my-pvc
|
||||
labels:
|
||||
app: my-app
|
||||
environment: production
|
||||
storage-type: database
|
||||
annotations:
|
||||
helm.sh/resource-policy: keep # Don't delete on helm uninstall
|
||||
```
|
||||
|
||||
### **4. Monitor Usage**
|
||||
```bash
|
||||
# Check PVC usage
|
||||
kubectl get pvc
|
||||
kubectl describe pvc <pvc-name>
|
||||
|
||||
# Check storage classes
|
||||
kubectl get storageclass
|
||||
|
||||
# Monitor disk usage in pods
|
||||
kubectl exec <pod-name> -- df -h
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **Your Setup Recommendations**
|
||||
|
||||
### **Current State: Good!**
|
||||
Your current setup uses PVCs appropriately:
|
||||
- ✅ **Gitea**: 15Gi for repositories (appropriate)
|
||||
- ✅ **MongoDB**: 8Gi for database (appropriate)
|
||||
- ✅ **Jenkins**: 50Gi for builds (appropriate)
|
||||
- ✅ **Central Storage**: 1Gi for logs (appropriate)
|
||||
|
||||
### **Potential Improvements**
|
||||
1. **Monitor usage**: Check actual disk usage in these PVCs
|
||||
2. **Consider backups**: Implement PVC backup strategy
|
||||
3. **Storage quotas**: Set namespace storage limits
|
||||
4. **Performance tuning**: Use Premium SSD for databases if needed
|
||||
|
||||
---
|
||||
|
||||
## 📖 **Next Steps**
|
||||
|
||||
1. **Monitor your current PVCs**:
|
||||
```bash
|
||||
kubectl get pvc --all-namespaces
|
||||
kubectl describe pvc <pvc-name>
|
||||
```
|
||||
|
||||
2. **Check storage usage**:
|
||||
```bash
|
||||
kubectl exec -it <pod-name> -- df -h
|
||||
```
|
||||
|
||||
3. **Learn about backup strategies**:
|
||||
- Azure Backup for PVCs
|
||||
- Velero for Kubernetes backups
|
||||
|
||||
4. **Consider storage optimization**:
|
||||
- Right-size PVCs based on actual usage
|
||||
- Use appropriate storage classes for cost optimization
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: September 3, 2025
|
||||
**Version**: 1.0
|
||||
**Maintainer**: Infrastructure Team
|
||||
1055
docs/PostgreSQL_Gitea_Integration_Guide.md
Normal file
1055
docs/PostgreSQL_Gitea_Integration_Guide.md
Normal file
File diff suppressed because it is too large
Load Diff
413
docs/README.md
Normal file
413
docs/README.md
Normal file
@ -0,0 +1,413 @@
|
||||
# 🚀 FreeLeaps DevOps Learning Path for Junior Engineers
|
||||
|
||||
> **Production-Ready Kubernetes & DevOps Documentation**
|
||||
> *Your gateway to understanding our actual infrastructure and becoming a DevOps expert*
|
||||
|
||||
---
|
||||
|
||||
## 📋 **Table of Contents**
|
||||
|
||||
1. [🎯 **Quick Start Guide**](#-quick-start-guide)
|
||||
2. [🏗️ **Your Production Infrastructure**](#️-your-production-infrastructure)
|
||||
3. [📚 **Core Learning Materials**](#-core-learning-materials)
|
||||
4. [🔧 **Practical Exercises**](#-practical-exercises)
|
||||
5. [⚡ **Essential Commands**](#-essential-commands)
|
||||
6. [🎓 **Learning Path**](#-learning-path)
|
||||
7. [🔍 **Production Troubleshooting**](#-production-troubleshooting)
|
||||
8. [📖 **Additional Resources**](#-additional-resources)
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Quick Start Guide**
|
||||
|
||||
### **🚀 First Day Checklist**
|
||||
- [ ] **Access your production cluster**: `kubectl config use-context your-cluster`
|
||||
- [ ] **Explore the management UI**: [RabbitMQ Management UI](#rabbitmq-management-ui)
|
||||
- [ ] **Check ArgoCD**: Visit `https://argo.mathmast.com`
|
||||
- [ ] **Review monitoring**: Access Grafana dashboards
|
||||
- [ ] **Understand your apps**: Check `freeleaps-devops-reconciler` status
|
||||
|
||||
### **🔑 Essential Access Points**
|
||||
```bash
|
||||
# Your production cluster access
|
||||
kubectl config get-contexts
|
||||
kubectl get nodes -o wide
|
||||
|
||||
# Your actual services
|
||||
kubectl get svc -A | grep -E "(rabbitmq|argocd|jenkins|gitea)"
|
||||
|
||||
# Your actual namespaces
|
||||
kubectl get namespaces | grep freeleaps
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ **Your Production Infrastructure**
|
||||
|
||||
### **🌐 Production Domains & Services**
|
||||
|
||||
| **Service** | **Production URL** | **Purpose** | **Access** |
|
||||
|-------------|-------------------|-------------|------------|
|
||||
| **ArgoCD** | `https://argo.mathmast.com` | GitOps deployment | Web UI |
|
||||
| **Gitea** | `https://gitea.freeleaps.mathmast.com` | Git repository | Web UI |
|
||||
| **Jenkins** | `http://jenkins.freeleaps.mathmast.com` | CI/CD pipelines | Web UI (Internal access only) |
|
||||
| **RabbitMQ** | `http://rabbitmq:15672` | Message broker | Management UI |
|
||||
| **Grafana** | `https://grafana.mathmast.com` | Monitoring | Dashboards |
|
||||
|
||||
### **🔧 Production Architecture**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ PRODUCTION INFRASTRUCTURE │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ Azure Load Balancer (4.155.160.32) │
|
||||
│ ┌─────────────────┐ ┌─────────────────┐ ┌──────────────┐ │
|
||||
│ │ Ingress-NGINX │ │ cert-manager │ │ ArgoCD │ │
|
||||
│ │ Controller │ │ (Let's Encrypt)│ │ (GitOps) │ │
|
||||
│ └─────────────────┘ └─────────────────┘ └──────────────┘ │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────┐ ┌─────────────────┐ ┌──────────────┐ │
|
||||
│ │ RabbitMQ │ │ Jenkins │ │ Gitea │ │
|
||||
│ │ (Message Q) │ │ (CI/CD) │ │ (Git Repo) │ │
|
||||
│ └─────────────────┘ └─────────────────┘ └──────────────┘ │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ ┌─────────────────┐ ┌─────────────────┐ ┌──────────────┐ │
|
||||
│ │ freeleaps- │ │ freeleaps- │ │ freeleaps- │ │
|
||||
│ │ devops- │ │ apps │ │ monitoring │ │
|
||||
│ │ reconciler │ │ (Your Apps) │ │ (Metrics) │ │
|
||||
│ └─────────────────┘ └─────────────────┘ └──────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### **📊 Production Namespaces**
|
||||
|
||||
```bash
|
||||
# Your actual namespaces
|
||||
freeleaps-alpha # Alpha environment
|
||||
freeleaps-prod # Production environment
|
||||
freeleaps-devops-system # DevOps tools
|
||||
freeleaps-controls-system # Control plane
|
||||
freeleaps-monitoring-system # Monitoring stack
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📚 **Core Learning Materials**
|
||||
|
||||
### **🎓 Phase 1: Kubernetes Fundamentals**
|
||||
- **[Kubernetes Core Concepts Guide](Kubernetes_Core_Concepts_Guide.md)** - *Start here!*
|
||||
- **Production Connection**: Your actual pods, services, and deployments
|
||||
- **Real Examples**: Based on your `freeleaps-devops-reconciler` deployment
|
||||
- **Hands-on**: Practice with your actual cluster
|
||||
|
||||
- **[PVC Deep Dive Guide](PVC_Deep_Dive_Guide.md)** - *Storage fundamentals*
|
||||
- **Production Connection**: Your Azure disk storage classes
|
||||
- **Real Examples**: How your apps use persistent storage
|
||||
- **Troubleshooting**: Common storage issues in your environment
|
||||
|
||||
### **🔧 Phase 2: DevOps Infrastructure**
|
||||
- **[Custom Resources & Operators Guide](Custom_Resources_And_Operators_Guide.md)** - *Advanced concepts*
|
||||
- **Production Connection**: Your `freeleaps-devops-reconciler` operator
|
||||
- **Real Examples**: How your CRDs work in production
|
||||
- **Architecture**: Understanding your operator pattern
|
||||
|
||||
- **[Reconciler Architecture Deep Dive](Reconciler_Architecture_Deep_Dive.md)** - *Your core system*
|
||||
- **Production Connection**: Your actual reconciler deployment
|
||||
- **Real Examples**: How your DevOps automation works
|
||||
- **Troubleshooting**: Common reconciler issues
|
||||
|
||||
- **[Reconciler Framework Analysis](Reconciler_Framework_Analysis.md)** - *Technical deep dive*
|
||||
- **Production Connection**: Your Python/Kopf operator framework
|
||||
- **Real Examples**: Code analysis from your actual implementation
|
||||
- **Best Practices**: How to improve your reconciler
|
||||
|
||||
### **🌐 Phase 3: Networking & Ingress**
|
||||
- **[Ingress Setup & Redirects Guide](Ingress_Setup_And_Redirects_Guide.md)** - *Web traffic management*
|
||||
- **Production Connection**: Your actual ingress controllers
|
||||
- **Real Examples**: How your domains are configured
|
||||
- **Troubleshooting**: Common ingress issues
|
||||
|
||||
- **[Current Ingress Analysis](Current_Ingress_Analysis.md)** - *Your actual setup*
|
||||
- **Production Connection**: Your real ingress configurations
|
||||
- **Real Examples**: Your actual domain routing
|
||||
- **Monitoring**: How to check ingress health
|
||||
|
||||
### **📨 Phase 4: Messaging & Communication**
|
||||
- **[RabbitMQ Management Analysis](RabbitMQ_Management_Analysis.md)** - *Message broker*
|
||||
- **Production Connection**: Your actual RabbitMQ deployment
|
||||
- **Real Examples**: Your message queues and exchanges
|
||||
- **Management UI**: How to use the built-in management interface
|
||||
|
||||
### **🗄️ Phase 4.5: Database Management**
|
||||
- **[PostgreSQL & Gitea Integration Guide](PostgreSQL_Gitea_Integration_Guide.md)** - *Database operations*
|
||||
- **Production Connection**: Your actual PostgreSQL deployments (Alpha vs Production)
|
||||
- **Real Examples**: How Gitea connects to PostgreSQL in your environments
|
||||
- **Data Access**: How to access and manage your Gitea database
|
||||
- **Monitoring**: Database health checks and performance monitoring
|
||||
|
||||
### **🚀 Phase 5: Operations & Deployment**
|
||||
- **[Kubernetes Bootstrap Guide](Kubernetes_Bootstrap_Guide.md)** - *Cluster setup*
|
||||
- **Production Connection**: How your cluster was built
|
||||
- **Real Examples**: Your actual bootstrap process
|
||||
- **Maintenance**: How to maintain your cluster
|
||||
|
||||
- **[Azure K8s Node Addition Runbook](Azure_K8s_Node_Addition_Runbook.md)** - *Scaling*
|
||||
- **Production Connection**: How to add nodes to your cluster
|
||||
- **Real Examples**: Your actual node addition process
|
||||
- **Automation**: Scripts for node management
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Practical Exercises**
|
||||
|
||||
### **🎯 Exercise 1: Explore Your Production Cluster**
|
||||
```bash
|
||||
# 1. Connect to your cluster
|
||||
kubectl config use-context your-production-cluster
|
||||
|
||||
# 2. Explore your namespaces
|
||||
kubectl get namespaces | grep freeleaps
|
||||
|
||||
# 3. Check your actual deployments
|
||||
kubectl get deployments -A | grep freeleaps
|
||||
|
||||
# 4. Monitor your reconciler
|
||||
kubectl logs -f deployment/freeleaps-devops-reconciler -n freeleaps-devops-system
|
||||
```
|
||||
|
||||
### **🎯 Exercise 2: RabbitMQ Management UI**
|
||||
```bash
|
||||
# 1. Port forward to RabbitMQ management UI
|
||||
kubectl port-forward svc/rabbitmq-headless -n freeleaps-alpha 15672:15672
|
||||
|
||||
# 2. Access the UI: http://localhost:15672
|
||||
# Username: user
|
||||
# Password: NjlhHFvnDuC7K0ir
|
||||
|
||||
# 3. Explore your queues:
|
||||
# - freeleaps.devops.reconciler.queue
|
||||
# - freeleaps.devops.reconciler.input
|
||||
```
|
||||
|
||||
### **🎯 Exercise 3: ArgoCD GitOps**
|
||||
```bash
|
||||
# 1. Access ArgoCD: https://argo.mathmast.com
|
||||
|
||||
# 2. Explore your applications:
|
||||
# - freeleaps-devops-reconciler
|
||||
# - freeleaps-apps
|
||||
# - monitoring stack
|
||||
|
||||
# 3. Check deployment status
|
||||
kubectl get applications -n argocd
|
||||
```
|
||||
|
||||
### **🎯 Exercise 4: Monitor Your Infrastructure**
|
||||
```bash
|
||||
# 1. Check cluster health
|
||||
kubectl get nodes -o wide
|
||||
|
||||
# 2. Monitor resource usage
|
||||
kubectl top nodes
|
||||
kubectl top pods -A
|
||||
|
||||
# 3. Check ingress status
|
||||
kubectl get ingress -A
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚡ **Essential Commands**
|
||||
|
||||
### **🔍 Production Monitoring**
|
||||
```bash
|
||||
# Your cluster health
|
||||
kubectl get nodes -o wide
|
||||
kubectl get pods -A --field-selector=status.phase!=Running
|
||||
|
||||
# Your services
|
||||
kubectl get svc -A | grep -E "(rabbitmq|argocd|jenkins|gitea)"
|
||||
|
||||
# Your reconciler status
|
||||
kubectl get deployment freeleaps-devops-reconciler -n freeleaps-devops-system
|
||||
kubectl logs -f deployment/freeleaps-devops-reconciler -n freeleaps-devops-system
|
||||
```
|
||||
|
||||
### **🔧 Troubleshooting**
|
||||
```bash
|
||||
# Check reconciler health
|
||||
kubectl describe deployment freeleaps-devops-reconciler -n freeleaps-devops-system
|
||||
|
||||
# Check RabbitMQ status
|
||||
kubectl get pods -n freeleaps-alpha | grep rabbitmq
|
||||
kubectl logs -f deployment/rabbitmq -n freeleaps-alpha
|
||||
|
||||
# Check ingress issues
|
||||
kubectl describe ingress -A
|
||||
kubectl get events -A --sort-by='.lastTimestamp'
|
||||
```
|
||||
|
||||
### **📊 Resource Management**
|
||||
```bash
|
||||
# Monitor resource usage
|
||||
kubectl top nodes
|
||||
kubectl top pods -A
|
||||
|
||||
# Check storage
|
||||
kubectl get pvc -A
|
||||
kubectl get pv
|
||||
|
||||
# Check networking
|
||||
kubectl get svc -A
|
||||
kubectl get endpoints -A
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎓 **Learning Path**
|
||||
|
||||
### **📅 Week 1: Foundations**
|
||||
- **Day 1-2**: [Kubernetes Core Concepts](Kubernetes_Core_Concepts_Guide.md)
|
||||
- **Day 3-4**: [PVC Deep Dive](PVC_Deep_Dive_Guide.md)
|
||||
- **Day 5**: Practice exercises with your actual cluster
|
||||
|
||||
### **📅 Week 2: DevOps Infrastructure**
|
||||
- **Day 1-2**: [Custom Resources & Operators](Custom_Resources_And_Operators_Guide.md)
|
||||
- **Day 3-4**: [Reconciler Architecture](Reconciler_Architecture_Deep_Dive.md)
|
||||
- **Day 5**: [Reconciler Framework Analysis](Reconciler_Framework_Analysis.md)
|
||||
|
||||
### **📅 Week 3: Networking & Communication**
|
||||
- **Day 1-2**: [Ingress Setup & Redirects](Ingress_Setup_And_Redirects_Guide.md)
|
||||
- **Day 3**: [Current Ingress Analysis](Current_Ingress_Analysis.md)
|
||||
- **Day 4-5**: [RabbitMQ Management](RabbitMQ_Management_Analysis.md)
|
||||
|
||||
### **📅 Week 4: Operations & Production**
|
||||
- **Day 1-2**: [Kubernetes Bootstrap](Kubernetes_Bootstrap_Guide.md)
|
||||
- **Day 3-4**: [Azure Node Addition](Azure_K8s_Node_Addition_Runbook.md)
|
||||
- **Day 5**: Production troubleshooting and monitoring
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **Production Troubleshooting**
|
||||
|
||||
### **🚨 Common Issues & Solutions**
|
||||
|
||||
#### **1. Reconciler Not Working**
|
||||
```bash
|
||||
# Check reconciler status
|
||||
kubectl get deployment freeleaps-devops-reconciler -n freeleaps-devops-system
|
||||
kubectl logs -f deployment/freeleaps-devops-reconciler -n freeleaps-devops-system
|
||||
|
||||
# Check RabbitMQ connection
|
||||
kubectl exec -it deployment/rabbitmq -n freeleaps-alpha -- rabbitmqctl list_connections
|
||||
```
|
||||
|
||||
#### **2. Ingress Issues**
|
||||
```bash
|
||||
# Check ingress controller
|
||||
kubectl get pods -n ingress-nginx
|
||||
kubectl logs -f deployment/ingress-nginx-controller -n ingress-nginx
|
||||
|
||||
# Check certificates
|
||||
kubectl get certificates -A
|
||||
kubectl describe certificate -n your-namespace
|
||||
```
|
||||
|
||||
#### **3. Storage Problems**
|
||||
```bash
|
||||
# Check PVC status
|
||||
kubectl get pvc -A
|
||||
kubectl describe pvc your-pvc-name -n your-namespace
|
||||
|
||||
# Check storage classes
|
||||
kubectl get storageclass
|
||||
```
|
||||
|
||||
### **📊 Monitoring & Alerts**
|
||||
|
||||
#### **Key Metrics to Watch**
|
||||
- **Cluster health**: Node status, pod restarts
|
||||
- **Resource usage**: CPU, memory, disk
|
||||
- **Network**: Ingress traffic, service connectivity
|
||||
- **Applications**: Reconciler health, RabbitMQ queues
|
||||
|
||||
#### **Alerting Setup**
|
||||
```bash
|
||||
# Check Prometheus targets
|
||||
kubectl get targets -n freeleaps-monitoring-system
|
||||
|
||||
# Check Grafana dashboards
|
||||
# Access: https://grafana.mathmast.com
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📖 **Additional Resources**
|
||||
|
||||
### **🔗 Official Documentation**
|
||||
- **[Kubernetes Documentation](https://kubernetes.io/docs/)** - Official K8s docs
|
||||
- **[ArgoCD Documentation](https://argo-cd.readthedocs.io/)** - GitOps platform
|
||||
- **[RabbitMQ Documentation](https://www.rabbitmq.com/documentation.html)** - Message broker
|
||||
- **[Helm Documentation](https://helm.sh/docs/)** - Package manager
|
||||
|
||||
### **🎥 Video Resources**
|
||||
- **Kubernetes Crash Course**: [TechWorld with Nana](https://www.youtube.com/watch?v=s_o8dwzRlu4)
|
||||
- **ArgoCD Tutorial**: [ArgoCD Official](https://www.youtube.com/watch?v=MeU5_k9ssOY)
|
||||
- **RabbitMQ Basics**: [RabbitMQ Official](https://www.youtube.com/watch?v=deG25y_r6OI)
|
||||
|
||||
### **📚 Books**
|
||||
- **"Kubernetes in Action"** by Marko Lukša
|
||||
- **"GitOps and Kubernetes"** by Billy Yuen
|
||||
- **"RabbitMQ in Depth"** by Gavin M. Roy
|
||||
|
||||
### **🛠️ Tools & Utilities**
|
||||
- **[k9s](https://k9scli.io/)** - Terminal UI for K8s
|
||||
- **[Lens](https://k8slens.dev/)** - Desktop IDE for K8s
|
||||
- **[kubectx](https://github.com/ahmetb/kubectx)** - Context switching
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Next Steps**
|
||||
|
||||
### **🚀 Immediate Actions**
|
||||
1. **Set up your development environment** with kubectl and helm
|
||||
2. **Access your production cluster** and explore the resources
|
||||
3. **Complete the practical exercises** in this guide
|
||||
4. **Join the monitoring dashboards** and understand the metrics
|
||||
|
||||
### **📈 Career Development**
|
||||
1. **Get certified**: [CKA (Certified Kubernetes Administrator)](https://www.cncf.io/certification/cka/)
|
||||
2. **Contribute**: Help improve the reconciler and infrastructure
|
||||
3. **Learn**: Stay updated with latest K8s and DevOps practices
|
||||
4. **Share**: Document your learnings and share with the team
|
||||
|
||||
### **🤝 Team Collaboration**
|
||||
- **Code reviews**: Review reconciler changes
|
||||
- **Documentation**: Improve this guide based on your experience
|
||||
- **Mentoring**: Help other junior engineers
|
||||
- **Innovation**: Suggest improvements to the infrastructure
|
||||
|
||||
---
|
||||
|
||||
## 📞 **Support & Contact**
|
||||
|
||||
### **🆘 Getting Help**
|
||||
- **Team Slack**: #devops-support channel
|
||||
- **Documentation**: This guide and linked resources
|
||||
- **Code Reviews**: GitHub pull requests
|
||||
- **Pair Programming**: Schedule sessions with senior engineers
|
||||
|
||||
### **📝 Feedback**
|
||||
- **Documentation**: Create issues for improvements
|
||||
- **Process**: Suggest workflow optimizations
|
||||
- **Tools**: Recommend new tools or improvements
|
||||
|
||||
---
|
||||
|
||||
**🎉 Welcome to the FreeLeaps DevOps team! You're now part of a production infrastructure that serves real users. Take ownership, learn continuously, and help us build amazing things!**
|
||||
|
||||
---
|
||||
|
||||
*Last updated: $(date)*
|
||||
*Maintained by: FreeLeaps DevOps Team*
|
||||
|
||||
1015
docs/RabbitMQ_Management_Analysis.md
Normal file
1015
docs/RabbitMQ_Management_Analysis.md
Normal file
File diff suppressed because it is too large
Load Diff
440
docs/Reconciler_Architecture_Deep_Dive.md
Normal file
440
docs/Reconciler_Architecture_Deep_Dive.md
Normal file
@ -0,0 +1,440 @@
|
||||
# Reconciler Architecture Deep Dive
|
||||
|
||||
## 🎯 **Overview**
|
||||
|
||||
Your `freeleaps-devops-reconciler` is a **sophisticated Kubernetes Operator** that orchestrates your entire DevOps infrastructure. It's not just a simple CRD controller - it's a **full-stack DevOps automation platform** that bridges your Git repositories, container registries, Jenkins pipelines, ArgoCD applications, and Kubernetes deployments.
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ **Architecture Overview**
|
||||
|
||||
### **🔄 The Big Picture: How Your Reconciler Works**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ FRELEAPS DEVOPS RECONCILER ARCHITECTURE │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ KUBERNETES OPERATOR (KOPF) │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ │ DevOpsProject │ │ ArgoSetting │ │ JenkinsSetting│ │ IngressResource│ │
|
||||
│ │ │ Controller │ │ Controller │ │ Controller │ │ Controller │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ EXTERNAL SERVICE INTEGRATION │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ │ ArgoCD │ │ Jenkins │ │ Docker Hub │ │ GoDaddy │ │
|
||||
│ │ │ Client │ │ Client │ │ Client │ │ Client │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ MESSAGING & EVENT SYSTEM │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
│ │ │ RabbitMQ │ │ Heartbeat │ │ Deployment │ │ TTL Monitor │ │
|
||||
│ │ │ Listener │ │ Sender │ │ Monitor │ │ Manager │ │
|
||||
│ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Core Components Deep Dive**
|
||||
|
||||
### **1. DevOpsProject Controller** 🏗️
|
||||
|
||||
**What it does:** The **orchestrator-in-chief** that creates your entire DevOps ecosystem.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
@kopf.on.create(group=consts.GROUP, version=consts.VERSION, kind=consts.DEVOPS_PROJECT_KIND)
|
||||
def on_devops_proj_created(name: str, namespace: Optional[str], body: Body, logger: Logger, **kwargs):
|
||||
# When you create a DevOpsProject CR, this triggers:
|
||||
# 1. Validates your Git repo and container registry config
|
||||
# 2. Creates ArgoSetting CR (for ArgoCD management)
|
||||
# 3. Creates JenkinsSetting CR (for CI/CD pipelines)
|
||||
# 4. Creates ContainerRegistry CR (for image management)
|
||||
# 5. Creates GitCredentials CR (for authentication)
|
||||
```
|
||||
|
||||
**Your Actual Flow:**
|
||||
```
|
||||
User creates DevOpsProject CR
|
||||
↓
|
||||
Reconciler validates Git repo + container registry
|
||||
↓
|
||||
Creates ArgoSetting CR (manages ArgoCD projects/apps)
|
||||
↓
|
||||
Creates JenkinsSetting CR (manages CI/CD pipelines)
|
||||
↓
|
||||
Creates ContainerRegistry CR (manages Docker images)
|
||||
↓
|
||||
Creates GitCredentials CR (manages authentication)
|
||||
↓
|
||||
Your DevOps ecosystem is ready! 🎉
|
||||
```
|
||||
|
||||
### **2. ArgoSetting Controller** 🚀
|
||||
|
||||
**What it does:** Manages your **ArgoCD infrastructure** - projects, repositories, and applications.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
# When ArgoSetting CR is created:
|
||||
for project in as_spec.projects:
|
||||
# Creates ArgoCD Project
|
||||
desired_resources.append(ManagedResource(
|
||||
resource_type="project",
|
||||
resource_id=project.name,
|
||||
description=project.desc,
|
||||
metadata={
|
||||
"source_repos": [repo.url for repo in as_spec.repositories],
|
||||
"destinations": [{"server": dest.server, "namespace": dest.namespace}]
|
||||
}
|
||||
))
|
||||
|
||||
for app in as_spec.applications:
|
||||
# Creates ArgoCD Application
|
||||
desired_resources.append(ManagedResource(
|
||||
resource_type="application",
|
||||
resource_id=app.name,
|
||||
metadata={
|
||||
"project": app.project,
|
||||
"repo_url": app.source.repo_url,
|
||||
"path": app.source.path,
|
||||
"target_revision": app.source.revision
|
||||
}
|
||||
))
|
||||
```
|
||||
|
||||
**Your Actual ArgoCD Management:**
|
||||
```
|
||||
ArgoSetting CR created
|
||||
↓
|
||||
Reconciler connects to ArgoCD API (argo.mathmast.com)
|
||||
↓
|
||||
Creates ArgoCD Project (defines permissions, repos, destinations)
|
||||
↓
|
||||
Creates ArgoCD Repository (connects to your Git repo)
|
||||
↓
|
||||
Creates ArgoCD Application (deploys your app)
|
||||
↓
|
||||
ArgoCD starts syncing your application! 🔄
|
||||
```
|
||||
|
||||
### **3. JenkinsSetting Controller** ⚙️
|
||||
|
||||
**What it does:** Manages your **Jenkins CI/CD pipelines** - creates folders, pipelines, and credentials.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
@kopf.timer(group=consts.GROUP, version=consts.VERSION, kind=consts.JENKINS_SETTINGS_KIND, interval=300)
|
||||
def poll_project_config(name: str, namespace: str, body: Body, logger: logging.Logger, **kwargs):
|
||||
# Every 5 minutes, the reconciler:
|
||||
# 1. Fetches your project's YAML config from Git
|
||||
# 2. Generates Jenkins Pipeline DSL
|
||||
# 3. Creates/updates Jenkins pipelines
|
||||
# 4. Manages pipeline credentials
|
||||
```
|
||||
|
||||
**Your Actual Jenkins Management:**
|
||||
```
|
||||
JenkinsSetting CR created
|
||||
↓
|
||||
Reconciler clones your Git repo
|
||||
↓
|
||||
Reads your project's YAML configuration
|
||||
↓
|
||||
Generates Jenkins Pipeline DSL (Groovy script)
|
||||
↓
|
||||
Creates Jenkins folder structure (project/environment)
|
||||
↓
|
||||
Creates Jenkins pipeline with your DSL
|
||||
↓
|
||||
Your CI/CD pipeline is ready! 🚀
|
||||
```
|
||||
|
||||
### **4. DeploymentRecord Controller** 🎯
|
||||
|
||||
**What it does:** Orchestrates **actual deployments** - triggers Jenkins builds, monitors ArgoCD sync, manages TTL.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
@kopf.on.create(group=consts.GROUP, version=consts.VERSION, plural=consts.DEPLOYMENT_RECORD_PLURAL)
|
||||
async def create_deployment_record(spec: Dict[str, Any], name: str, namespace: str, uid: str, logger: Logger, **kwargs):
|
||||
# When you trigger a deployment:
|
||||
# 1. Validates deployment request
|
||||
# 2. Triggers Jenkins build
|
||||
# 3. Monitors build progress
|
||||
# 4. Triggers ArgoCD sync
|
||||
# 5. Monitors deployment status
|
||||
# 6. Manages TTL (Time To Live)
|
||||
```
|
||||
|
||||
**Your Actual Deployment Flow:**
|
||||
```
|
||||
User clicks "Deploy" button
|
||||
↓
|
||||
DeploymentRecord CR created
|
||||
↓
|
||||
Reconciler triggers Jenkins build
|
||||
↓
|
||||
Monitors build phases (building, testing, packaging)
|
||||
↓
|
||||
Triggers ArgoCD sync when build completes
|
||||
↓
|
||||
Monitors ArgoCD sync status
|
||||
↓
|
||||
Creates IngressResource for external access
|
||||
↓
|
||||
Deployment is live! 🌐
|
||||
```
|
||||
|
||||
### **5. IngressResource Controller** 🌐
|
||||
|
||||
**What it does:** Manages **external access** - DNS records, SSL certificates, and ingress rules.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
async def create_ingress_resource(self, body: Body, name: str, namespace: str, **kwargs):
|
||||
# When IngressResource CR is created:
|
||||
# 1. Creates DNS record via GoDaddy API
|
||||
# 2. Requests SSL certificate via cert-manager
|
||||
# 3. Creates Kubernetes Ingress
|
||||
# 4. Updates deployment URL
|
||||
# 5. Sends heartbeat with live URL
|
||||
```
|
||||
|
||||
**Your Actual Ingress Management:**
|
||||
```
|
||||
IngressResource CR created
|
||||
↓
|
||||
Reconciler calls GoDaddy API
|
||||
↓
|
||||
Creates DNS record (app.mathmast.com → 4.155.160.32)
|
||||
↓
|
||||
Requests SSL certificate from Let's Encrypt
|
||||
↓
|
||||
Creates Kubernetes Ingress rule
|
||||
↓
|
||||
Your app is accessible at https://app.mathmast.com! 🔒
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📡 **Messaging & Event System**
|
||||
|
||||
### **RabbitMQ Integration** 🐰
|
||||
|
||||
Your reconciler uses **RabbitMQ** for asynchronous communication and event-driven architecture.
|
||||
|
||||
**Event Types:**
|
||||
```python
|
||||
class EventType(Enum):
|
||||
DEVOPS_INITIALIZE = "DevOpsInitialize" # New project setup
|
||||
DEVOPS_RECONCILE = "DevOpsReconcile" # Deployment trigger
|
||||
DEVOPS_RECONCILE_HEARTBEAT = "DevOpsReconcileJobHeartbeat" # Progress updates
|
||||
```
|
||||
|
||||
**Real Event Flow:**
|
||||
```
|
||||
User triggers deployment
|
||||
↓
|
||||
DevOpsReconcileEvent sent to RabbitMQ
|
||||
↓
|
||||
Reconciler picks up event
|
||||
↓
|
||||
Creates DeploymentRecord CR
|
||||
↓
|
||||
Sends heartbeat every 30 seconds
|
||||
↓
|
||||
User sees real-time progress! 📊
|
||||
```
|
||||
|
||||
### **Heartbeat System** 💓
|
||||
|
||||
**What it does:** Provides **real-time deployment status** to your users.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
@dataclass
|
||||
class DevOpsReconcileJobHeartbeatEvent:
|
||||
operation: str = "heartbeat"
|
||||
id: str = "" # deployment ID
|
||||
status: str = "running" # running/success/failed/terminated
|
||||
phase: str = "initializing" # current deployment phase
|
||||
phase_message: str = "" # human-readable message
|
||||
url: Optional[str] = None # live URL when deployment completes
|
||||
```
|
||||
|
||||
**Your Actual Heartbeat Flow:**
|
||||
```
|
||||
Deployment starts
|
||||
↓
|
||||
Heartbeat every 30 seconds:
|
||||
- Phase: "initializing" → "building" → "deploying" → "verifying"
|
||||
- Status: "running" → "success"
|
||||
- URL: None → "https://app.mathmast.com"
|
||||
↓
|
||||
User sees live progress in UI! 📈
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🕒 **TTL (Time To Live) Management**
|
||||
|
||||
### **Automatic Cleanup** 🧹
|
||||
|
||||
Your reconciler includes **sophisticated TTL management** for temporary deployments.
|
||||
|
||||
**Real Code Example:**
|
||||
```python
|
||||
@dataclass
|
||||
class TTLMonitoringState:
|
||||
deployment_record_name: str
|
||||
enabled: bool
|
||||
ttl_seconds: int # Default: 3 hours (10800 seconds)
|
||||
start_time: datetime
|
||||
expiration_time: datetime
|
||||
phase: TTLMonitoringPhase # monitoring/cleanup/completed
|
||||
```
|
||||
|
||||
**Your Actual TTL Flow:**
|
||||
```
|
||||
Deployment completes
|
||||
↓
|
||||
TTL monitoring starts (3 hours by default)
|
||||
↓
|
||||
Every minute: Check if TTL expired
|
||||
↓
|
||||
When expired: Trigger cleanup
|
||||
↓
|
||||
Delete ArgoCD applications
|
||||
↓
|
||||
Delete Kubernetes resources
|
||||
↓
|
||||
Delete DNS records
|
||||
↓
|
||||
Delete SSL certificates
|
||||
↓
|
||||
Environment cleaned up! 🧹
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔗 **Relationship with Your DevOps Infrastructure**
|
||||
|
||||
### **How the Reconciler Extends Kubernetes** 🔧
|
||||
|
||||
Your reconciler doesn't just manage Kubernetes resources - it **extends Kubernetes** with custom DevOps capabilities:
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ KUBERNETES API EXTENSION │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ CUSTOM RESOURCES (CRs) │ │
|
||||
│ │ │ │
|
||||
│ │ DevOpsProject CR → ArgoSetting CR → JenkinsSetting CR │ │
|
||||
│ │ ↓ ↓ ↓ │ │
|
||||
│ │ ContainerRegistry CR → GitCredentials CR → IngressResource CR │ │
|
||||
│ │ ↓ ↓ ↓ │ │
|
||||
│ │ DeploymentRecord CR → TTL Management → Heartbeat System │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ EXTERNAL SERVICE ORCHESTRATION │ │
|
||||
│ │ │ │
|
||||
│ │ ArgoCD API → Jenkins API → Docker Hub API → GoDaddy API │ │
|
||||
│ │ ↓ ↓ ↓ ↓ │ │
|
||||
│ │ Git Repos → CI/CD Pipelines → Container Images → DNS Records │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### **Your Actual DevOps Workflow** 🔄
|
||||
|
||||
**Complete Flow Example:**
|
||||
```
|
||||
1. Developer pushes code to Git
|
||||
↓
|
||||
2. User creates DevOpsProject CR
|
||||
↓
|
||||
3. Reconciler creates ArgoSetting CR
|
||||
↓
|
||||
4. Reconciler creates JenkinsSetting CR
|
||||
↓
|
||||
5. Jenkins pipeline created and triggered
|
||||
↓
|
||||
6. Build completes, image pushed to registry
|
||||
↓
|
||||
7. ArgoCD syncs new image
|
||||
↓
|
||||
8. IngressResource creates external access
|
||||
↓
|
||||
9. App is live at https://app.mathmast.com
|
||||
↓
|
||||
10. TTL monitoring starts (3 hours)
|
||||
↓
|
||||
11. After 3 hours: Automatic cleanup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Key Benefits of Your Reconciler Architecture**
|
||||
|
||||
### **1. Declarative DevOps** 📝
|
||||
- **Define once, deploy everywhere**: Your DevOpsProject CR defines your entire infrastructure
|
||||
- **GitOps workflow**: Everything is version-controlled and declarative
|
||||
- **Consistency**: Same process for alpha and production environments
|
||||
|
||||
### **2. Automation at Scale** 🤖
|
||||
- **Zero manual intervention**: From Git push to live deployment
|
||||
- **Multi-environment support**: Alpha and production with same configuration
|
||||
- **Automatic cleanup**: TTL management prevents resource waste
|
||||
|
||||
### **3. Real-time Visibility** 👁️
|
||||
- **Live progress tracking**: Heartbeat system shows real-time deployment status
|
||||
- **Comprehensive monitoring**: Every phase is tracked and reported
|
||||
- **Error handling**: Detailed error messages and recovery mechanisms
|
||||
|
||||
### **4. Enterprise Integration** 🏢
|
||||
- **Multi-service orchestration**: ArgoCD, Jenkins, Docker Hub, GoDaddy
|
||||
- **Security**: Credential management and SSL certificate automation
|
||||
- **Scalability**: Kubernetes-native architecture scales with your cluster
|
||||
|
||||
---
|
||||
|
||||
## 🔍 **Your Reconciler vs. Traditional DevOps**
|
||||
|
||||
### **Traditional DevOps** 🏗️
|
||||
```
|
||||
Manual Jenkins setup → Manual ArgoCD config → Manual DNS setup → Manual SSL setup
|
||||
```
|
||||
|
||||
### **Your Reconciler** 🚀
|
||||
```
|
||||
DevOpsProject CR → Automatic Jenkins + ArgoCD + DNS + SSL setup
|
||||
```
|
||||
|
||||
**The difference:** Your reconciler transforms **manual DevOps tasks** into **declarative, automated, and scalable** operations that run on Kubernetes.
|
||||
|
||||
---
|
||||
|
||||
## 🎉 **Conclusion**
|
||||
|
||||
Your `freeleaps-devops-reconciler` is not just a Kubernetes operator - it's a **complete DevOps automation platform** that:
|
||||
|
||||
1. **Extends Kubernetes** with custom DevOps capabilities
|
||||
2. **Orchestrates multiple external services** (ArgoCD, Jenkins, Docker Hub, GoDaddy)
|
||||
3. **Provides real-time visibility** into deployment progress
|
||||
4. **Automates complex workflows** from Git push to live deployment
|
||||
5. **Manages the complete lifecycle** including cleanup and TTL
|
||||
|
||||
It's the **brain** of your DevOps infrastructure, making complex multi-service orchestration as simple as creating a Kubernetes Custom Resource! 🧠✨
|
||||
521
docs/Reconciler_Framework_Analysis.md
Normal file
521
docs/Reconciler_Framework_Analysis.md
Normal file
@ -0,0 +1,521 @@
|
||||
# Reconciler Framework Analysis & Robustness Assessment
|
||||
|
||||
## 🎯 **Framework Overview**
|
||||
|
||||
Your `freeleaps-devops-reconciler` is built on **Kopf** (Kubernetes Operator Pythonic Framework), not FastAPI. Here's the detailed breakdown:
|
||||
|
||||
### **🏗️ Framework Stack**
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ FRELEAPS RECONCILER FRAMEWORK STACK │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ KOPF (Kubernetes Operator Framework) │ │
|
||||
│ │ │ │
|
||||
│ │ • Event-driven Kubernetes resource watching │ │
|
||||
│ │ • Custom Resource Definition (CRD) management │ │
|
||||
│ │ • Reconciliation loop with retry mechanisms │ │
|
||||
│ │ • Kubernetes API integration │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ ASYNCIO + THREADING HYBRID │ │
|
||||
│ │ │ │
|
||||
│ │ • Asynchronous operations for I/O-bound tasks │ │
|
||||
│ │ • Threading for CPU-bound operations │ │
|
||||
│ │ • Event loop management for concurrent operations │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ RABBITMQ MESSAGING LAYER │ │
|
||||
│ │ │ │
|
||||
│ │ • Asynchronous message processing │ │
|
||||
│ │ • Event-driven architecture │ │
|
||||
│ │ • Heartbeat system for real-time updates │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ EXTERNAL SERVICE INTEGRATION │ │
|
||||
│ │ │ │
|
||||
│ │ • ArgoCD API client (synchronous) │ │
|
||||
│ │ • Jenkins API client (synchronous) │ │
|
||||
│ │ • Docker Hub API client (synchronous) │ │
|
||||
│ │ • GoDaddy DNS API client (asynchronous) │ │
|
||||
│ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔧 **Framework Architecture Deep Dive**
|
||||
|
||||
### **1. Kopf Framework** 🎯
|
||||
|
||||
**What it is:** A Python framework for building Kubernetes operators using decorators and event handlers.
|
||||
|
||||
**Your Implementation:**
|
||||
```python
|
||||
# Main operator setup
|
||||
kopf.configure(
|
||||
verbose=config.RECONCILER_DEBUG,
|
||||
)
|
||||
|
||||
# Event handlers using decorators
|
||||
@kopf.on.create(group=consts.GROUP, version=consts.VERSION, kind=consts.DEVOPS_PROJECT_KIND)
|
||||
def on_devops_proj_created(name: str, namespace: Optional[str], body: Body, logger: Logger, **kwargs):
|
||||
# Your reconciliation logic here
|
||||
|
||||
@kopf.timer(group=consts.GROUP, version=consts.VERSION, kind=consts.JENKINS_SETTINGS_KIND, interval=300)
|
||||
def poll_project_config(name: str, namespace: str, body: Body, logger: logging.Logger, **kwargs):
|
||||
# Periodic reconciliation every 5 minutes
|
||||
```
|
||||
|
||||
**Key Features:**
|
||||
- **Event-driven**: Watches Kubernetes API for resource changes
|
||||
- **Retry mechanisms**: `kopf.TemporaryError` for transient failures
|
||||
- **Resource management**: Automatic cleanup and state management
|
||||
- **Logging integration**: Built-in logging with Kubernetes events
|
||||
|
||||
### **2. Asyncio + Threading Hybrid** 🔄
|
||||
|
||||
**Your Architecture Pattern:**
|
||||
```python
|
||||
# Main event loop (asyncio)
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(
|
||||
kopf.operator(
|
||||
clusterwide=False,
|
||||
priority=int(time.time() * 1000000),
|
||||
peering_name="freeleaps-devops-reconciler",
|
||||
namespaces=["freeleaps-devops-system"],
|
||||
)
|
||||
)
|
||||
|
||||
# Threading for TTL recovery
|
||||
def delayed_ttl_recovery():
|
||||
import threading
|
||||
ttl_thread = threading.Thread(target=delayed_ttl_recovery, daemon=True)
|
||||
ttl_thread.start()
|
||||
```
|
||||
|
||||
**Why This Pattern:**
|
||||
- **Asyncio**: For I/O-bound operations (API calls, network requests)
|
||||
- **Threading**: For CPU-bound operations and blocking calls
|
||||
- **Event Loop**: Manages concurrent operations efficiently
|
||||
|
||||
### **3. RabbitMQ Integration** 🐰
|
||||
|
||||
**Your Messaging Architecture:**
|
||||
```python
|
||||
# Event types
|
||||
class EventType(Enum):
|
||||
DEVOPS_INITIALIZE = "DevOpsInitialize" # New project setup
|
||||
DEVOPS_RECONCILE = "DevOpsReconcile" # Deployment trigger
|
||||
DEVOPS_RECONCILE_HEARTBEAT = "DevOpsReconcileJobHeartbeat" # Progress updates
|
||||
|
||||
# Async message processing
|
||||
async def handle_rabbitmq_message(ch, method, properties, body):
|
||||
# Process messages asynchronously
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ **Current Issues & Reliability Problems**
|
||||
|
||||
### **1. Error Handling Inconsistencies** 🚨
|
||||
|
||||
**Problem:** Mixed error handling patterns throughout the codebase.
|
||||
|
||||
**Evidence:**
|
||||
```python
|
||||
# Inconsistent error handling patterns found:
|
||||
# Pattern 1: Generic Exception catching
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to setup HeartbeatSender: {e}")
|
||||
logger.warning("DeploymentRecord controller will continue without heartbeat functionality")
|
||||
|
||||
# Pattern 2: Specific error handling
|
||||
except kopf.TemporaryError:
|
||||
raise # Re-raise kopf.TemporaryError for retry
|
||||
|
||||
# Pattern 3: Custom error classes
|
||||
except SecretNotFoundError as e:
|
||||
# Handle specific error
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- **Silent failures**: Some exceptions are caught and logged but not properly handled
|
||||
- **Inconsistent retry logic**: Some errors retry, others don't
|
||||
- **Resource leaks**: Failed operations may leave resources in inconsistent state
|
||||
|
||||
### **2. Threading and Asyncio Complexity** 🔄
|
||||
|
||||
**Problem:** Complex interaction between threading and asyncio can lead to race conditions.
|
||||
|
||||
**Evidence:**
|
||||
```python
|
||||
# Complex threading setup in operator.py
|
||||
def delayed_ttl_recovery():
|
||||
import threading
|
||||
import asyncio
|
||||
|
||||
def run_async_callback():
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
if loop.is_running():
|
||||
asyncio.run_coroutine_threadsafe(run_ttl_recovery(), loop)
|
||||
else:
|
||||
loop.run_until_complete(run_ttl_recovery())
|
||||
|
||||
ttl_thread = threading.Thread(target=delayed_ttl_recovery, daemon=True)
|
||||
ttl_thread.start()
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- **Race conditions**: Multiple threads accessing shared resources
|
||||
- **Event loop conflicts**: Complex event loop management
|
||||
- **Resource cleanup**: Daemon threads may not clean up properly
|
||||
|
||||
### **3. Configuration Management** ⚙️
|
||||
|
||||
**Problem:** Complex configuration with many environment variables and potential for misconfiguration.
|
||||
|
||||
**Evidence:**
|
||||
```python
|
||||
# 50+ environment variables in config.py
|
||||
env_mappings = {
|
||||
"RECONCILER_DEBUG": (bool, lambda x: x.lower() == "true"),
|
||||
"RABBITMQ_HOST": str,
|
||||
"RABBITMQ_PORT": int,
|
||||
"JENKINS_ENDPOINT": str,
|
||||
"ARGOCD_ENDPOINT": str,
|
||||
# ... 40+ more variables
|
||||
}
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- **Configuration drift**: Easy to have mismatched configurations
|
||||
- **Validation gaps**: Limited validation of configuration values
|
||||
- **Default handling**: Some configurations have defaults, others don't
|
||||
|
||||
### **4. External Service Dependencies** 🔗
|
||||
|
||||
**Problem:** Heavy dependency on external services that can fail independently.
|
||||
|
||||
**Evidence:**
|
||||
```python
|
||||
# Multiple external service dependencies
|
||||
try:
|
||||
init_argo_client(host=config.ARGOCD_ENDPOINT, ...)
|
||||
remote_argo_ver = get_argo_client().get_version()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to ArgoCD server: {e}")
|
||||
logger.warning("Continuing operator startup without ArgoCD connection")
|
||||
|
||||
try:
|
||||
message_listener = MessageListener(...)
|
||||
if message_listener.start():
|
||||
logger.info("RabbitMQ message listener started successfully")
|
||||
else:
|
||||
logger.warning("Failed to start RabbitMQ message listener")
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting RabbitMQ message listener: {e}")
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- **Cascade failures**: One service failure can affect others
|
||||
- **Partial functionality**: System continues with degraded capabilities
|
||||
- **Error propagation**: Errors from external services may not be properly handled
|
||||
|
||||
### **5. Resource Management** 💾
|
||||
|
||||
**Problem:** Complex resource lifecycle management with potential for leaks.
|
||||
|
||||
**Evidence:**
|
||||
```python
|
||||
# Complex resource cleanup in TTL management
|
||||
async def cleanup_application_resources(self, applications: List[ArgoApplicationInfo],
|
||||
skip_resource_types: List[str] = None,
|
||||
cleanup_timeout: int = 300) -> Dict[str, Any]:
|
||||
# Complex cleanup logic with multiple failure points
|
||||
```
|
||||
|
||||
**Issues:**
|
||||
- **Resource leaks**: Failed cleanup operations may leave resources
|
||||
- **Timeout handling**: Complex timeout management across multiple operations
|
||||
- **State inconsistency**: Resources may be in inconsistent states after failures
|
||||
|
||||
---
|
||||
|
||||
## 🚀 **Robustness Improvement Recommendations**
|
||||
|
||||
### **1. Standardized Error Handling** 🛡️
|
||||
|
||||
**Recommendation:** Implement consistent error handling patterns.
|
||||
|
||||
```python
|
||||
# Proposed error handling pattern
|
||||
class ReconcilerErrorHandler:
|
||||
@staticmethod
|
||||
def handle_operation(operation_name: str, operation: Callable, logger: Logger):
|
||||
try:
|
||||
return operation()
|
||||
except kopf.TemporaryError:
|
||||
# Re-raise for retry
|
||||
raise
|
||||
except ExternalServiceError as e:
|
||||
# Handle external service failures
|
||||
logger.error(f"External service error in {operation_name}: {e}")
|
||||
raise kopf.TemporaryError(f"External service unavailable: {e}", delay=30)
|
||||
except ValidationError as e:
|
||||
# Handle validation errors
|
||||
logger.error(f"Validation error in {operation_name}: {e}")
|
||||
raise kopf.PermanentError(f"Invalid configuration: {e}")
|
||||
except Exception as e:
|
||||
# Handle unexpected errors
|
||||
logger.error(f"Unexpected error in {operation_name}: {e}")
|
||||
raise kopf.TemporaryError(f"Internal error: {e}", delay=60)
|
||||
```
|
||||
|
||||
### **2. Simplified Asyncio Architecture** 🔄
|
||||
|
||||
**Recommendation:** Reduce threading complexity and use pure asyncio where possible.
|
||||
|
||||
```python
|
||||
# Proposed simplified architecture
|
||||
class ReconcilerManager:
|
||||
def __init__(self):
|
||||
self.event_loop = asyncio.get_event_loop()
|
||||
self.tasks = []
|
||||
|
||||
async def start(self):
|
||||
# Start all async tasks
|
||||
self.tasks.extend([
|
||||
asyncio.create_task(self.ttl_monitor()),
|
||||
asyncio.create_task(self.heartbeat_sender()),
|
||||
asyncio.create_task(self.message_listener()),
|
||||
])
|
||||
|
||||
async def stop(self):
|
||||
# Clean shutdown of all tasks
|
||||
for task in self.tasks:
|
||||
task.cancel()
|
||||
await asyncio.gather(*self.tasks, return_exceptions=True)
|
||||
```
|
||||
|
||||
### **3. Configuration Validation** ✅
|
||||
|
||||
**Recommendation:** Add comprehensive configuration validation.
|
||||
|
||||
```python
|
||||
# Proposed configuration validation
|
||||
class ConfigurationValidator:
|
||||
@staticmethod
|
||||
def validate_config(config: Config) -> List[str]:
|
||||
errors = []
|
||||
|
||||
# Required fields
|
||||
required_fields = [
|
||||
"RABBITMQ_HOST", "RABBITMQ_PORT", "JENKINS_ENDPOINT",
|
||||
"ARGOCD_ENDPOINT", "DEFAULT_GIT_USERNAME"
|
||||
]
|
||||
|
||||
for field in required_fields:
|
||||
if not getattr(config, field, None):
|
||||
errors.append(f"Missing required configuration: {field}")
|
||||
|
||||
# URL validation
|
||||
if not is_valid_url(config.JENKINS_ENDPOINT):
|
||||
errors.append(f"Invalid Jenkins endpoint: {config.JENKINS_ENDPOINT}")
|
||||
|
||||
# Port validation
|
||||
if not (1 <= config.RABBITMQ_PORT <= 65535):
|
||||
errors.append(f"Invalid RabbitMQ port: {config.RABBITMQ_PORT}")
|
||||
|
||||
return errors
|
||||
```
|
||||
|
||||
### **4. Circuit Breaker Pattern** ⚡
|
||||
|
||||
**Recommendation:** Implement circuit breakers for external service calls.
|
||||
|
||||
```python
|
||||
# Proposed circuit breaker implementation
|
||||
class CircuitBreaker:
|
||||
def __init__(self, failure_threshold: int = 5, timeout: int = 60):
|
||||
self.failure_threshold = failure_threshold
|
||||
self.timeout = timeout
|
||||
self.failure_count = 0
|
||||
self.last_failure_time = None
|
||||
self.state = "CLOSED" # CLOSED, OPEN, HALF_OPEN
|
||||
|
||||
async def call(self, operation: Callable):
|
||||
if self.state == "OPEN":
|
||||
if time.time() - self.last_failure_time > self.timeout:
|
||||
self.state = "HALF_OPEN"
|
||||
else:
|
||||
raise ExternalServiceError("Circuit breaker is OPEN")
|
||||
|
||||
try:
|
||||
result = await operation()
|
||||
if self.state == "HALF_OPEN":
|
||||
self.state = "CLOSED"
|
||||
self.failure_count = 0
|
||||
return result
|
||||
except Exception as e:
|
||||
self.failure_count += 1
|
||||
self.last_failure_time = time.time()
|
||||
|
||||
if self.failure_count >= self.failure_threshold:
|
||||
self.state = "OPEN"
|
||||
|
||||
raise e
|
||||
```
|
||||
|
||||
### **5. Health Checks and Monitoring** 📊
|
||||
|
||||
**Recommendation:** Add comprehensive health checks and monitoring.
|
||||
|
||||
```python
|
||||
# Proposed health check system
|
||||
class HealthChecker:
|
||||
def __init__(self):
|
||||
self.checks = {
|
||||
"kopf_operator": self.check_kopf_operator,
|
||||
"rabbitmq_connection": self.check_rabbitmq_connection,
|
||||
"argocd_connection": self.check_argocd_connection,
|
||||
"jenkins_connection": self.check_jenkins_connection,
|
||||
"kubernetes_api": self.check_kubernetes_api,
|
||||
}
|
||||
|
||||
async def run_health_checks(self) -> Dict[str, bool]:
|
||||
results = {}
|
||||
for name, check in self.checks.items():
|
||||
try:
|
||||
results[name] = await check()
|
||||
except Exception as e:
|
||||
results[name] = False
|
||||
logger.error(f"Health check failed for {name}: {e}")
|
||||
return results
|
||||
|
||||
async def check_kopf_operator(self) -> bool:
|
||||
# Check if Kopf operator is running
|
||||
return True
|
||||
|
||||
async def check_rabbitmq_connection(self) -> bool:
|
||||
# Check RabbitMQ connectivity
|
||||
return True
|
||||
```
|
||||
|
||||
### **6. Resource Lifecycle Management** 🔄
|
||||
|
||||
**Recommendation:** Implement proper resource lifecycle management.
|
||||
|
||||
```python
|
||||
# Proposed resource lifecycle manager
|
||||
class ResourceLifecycleManager:
|
||||
def __init__(self):
|
||||
self.resources = {}
|
||||
|
||||
async def create_resource(self, resource_type: str, resource_id: str,
|
||||
create_func: Callable, cleanup_func: Callable):
|
||||
try:
|
||||
result = await create_func()
|
||||
self.resources[resource_id] = {
|
||||
"type": resource_type,
|
||||
"created_at": time.time(),
|
||||
"cleanup_func": cleanup_func,
|
||||
"status": "active"
|
||||
}
|
||||
return result
|
||||
except Exception as e:
|
||||
# Cleanup on creation failure
|
||||
await self.cleanup_resource(resource_id)
|
||||
raise e
|
||||
|
||||
async def cleanup_resource(self, resource_id: str):
|
||||
if resource_id in self.resources:
|
||||
resource = self.resources[resource_id]
|
||||
try:
|
||||
await resource["cleanup_func"]()
|
||||
resource["status"] = "cleaned"
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup resource {resource_id}: {e}")
|
||||
resource["status"] = "cleanup_failed"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **Feature Enhancement Recommendations**
|
||||
|
||||
### **1. Observability Improvements** 👁️
|
||||
|
||||
**Current State:** Basic logging with some structured logging.
|
||||
|
||||
**Recommendations:**
|
||||
- **Distributed tracing**: Add OpenTelemetry integration
|
||||
- **Metrics collection**: Prometheus metrics for all operations
|
||||
- **Structured logging**: Consistent log format across all components
|
||||
- **Alerting**: Proactive alerts for failures and degraded states
|
||||
|
||||
### **2. Testing Improvements** 🧪
|
||||
|
||||
**Current State:** Limited test coverage with some unit tests.
|
||||
|
||||
**Recommendations:**
|
||||
- **Integration tests**: Test full reconciliation flows
|
||||
- **Chaos engineering**: Test failure scenarios
|
||||
- **Performance tests**: Test under load
|
||||
- **End-to-end tests**: Test complete user workflows
|
||||
|
||||
### **3. Security Enhancements** 🔒
|
||||
|
||||
**Current State:** Basic authentication and authorization.
|
||||
|
||||
**Recommendations:**
|
||||
- **RBAC improvements**: Fine-grained permissions
|
||||
- **Secret management**: Better secret rotation and management
|
||||
- **Audit logging**: Comprehensive audit trails
|
||||
- **Network policies**: Restrict network access
|
||||
|
||||
### **4. Performance Optimizations** ⚡
|
||||
|
||||
**Current State:** Basic performance with some optimization.
|
||||
|
||||
**Recommendations:**
|
||||
- **Connection pooling**: Reuse connections to external services
|
||||
- **Caching**: Cache frequently accessed data
|
||||
- **Batch operations**: Batch API calls where possible
|
||||
- **Resource limits**: Proper resource limits and requests
|
||||
|
||||
---
|
||||
|
||||
## 🎉 **Conclusion**
|
||||
|
||||
Your `freeleaps-devops-reconciler` is a **sophisticated DevOps automation platform** built on solid foundations, but it has several areas for improvement:
|
||||
|
||||
### **Strengths** ✅
|
||||
- **Comprehensive functionality**: Handles complex multi-service orchestration
|
||||
- **Event-driven architecture**: Good use of RabbitMQ for messaging
|
||||
- **Kubernetes-native**: Proper use of Kopf framework
|
||||
- **Real-time visibility**: Heartbeat system provides good user experience
|
||||
|
||||
### **Areas for Improvement** 🔧
|
||||
- **Error handling**: Standardize error handling patterns
|
||||
- **Architecture complexity**: Simplify threading/asyncio interactions
|
||||
- **Configuration management**: Add validation and defaults
|
||||
- **External dependencies**: Implement circuit breakers and fallbacks
|
||||
- **Resource management**: Improve lifecycle management
|
||||
- **Observability**: Add comprehensive monitoring and tracing
|
||||
|
||||
### **Priority Recommendations** 🎯
|
||||
1. **High Priority**: Standardize error handling and add circuit breakers
|
||||
2. **Medium Priority**: Simplify architecture and add configuration validation
|
||||
3. **Low Priority**: Add comprehensive monitoring and testing
|
||||
|
||||
The reconciler is **production-ready** but would benefit significantly from these robustness improvements to handle edge cases and failures more gracefully! 🚀
|
||||
397
docs/add_k8s_node.sh
Executable file
397
docs/add_k8s_node.sh
Executable file
@ -0,0 +1,397 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Azure Kubernetes Node Addition Script
|
||||
# This script automates the process of adding new Azure VMs to an existing Kubernetes cluster
|
||||
|
||||
set -e # Exit on any error
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
INVENTORY_FILE="freeleaps-ops/cluster/ansible/manifests/inventory.ini"
|
||||
KUBESPRAY_DIR="freeleaps-ops/3rd/kubespray"
|
||||
ANSIBLE_USER="wwwadmin@mathmast.com"
|
||||
|
||||
# Function to print colored output
|
||||
print_status() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# Function to validate input
|
||||
validate_input() {
|
||||
if [[ -z "$1" ]]; then
|
||||
print_error "Input cannot be empty"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# Function to check prerequisites
|
||||
check_prerequisites() {
|
||||
print_status "Checking prerequisites..."
|
||||
|
||||
# Check if kubectl is installed
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
print_error "kubectl is not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if ansible is installed
|
||||
if ! command -v ansible &> /dev/null; then
|
||||
print_error "ansible is not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if az CLI is installed
|
||||
if ! command -v az &> /dev/null; then
|
||||
print_error "Azure CLI is not installed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if inventory file exists
|
||||
if [[ ! -f "$INVENTORY_FILE" ]]; then
|
||||
print_error "Inventory file not found: $INVENTORY_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if kubespray directory exists
|
||||
if [[ ! -d "$KUBESPRAY_DIR" ]]; then
|
||||
print_error "Kubespray directory not found: $KUBESPRAY_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "All prerequisites are met"
|
||||
}
|
||||
|
||||
# Function to get VM details from Azure
|
||||
get_vm_details() {
|
||||
local vm_name="$1"
|
||||
local resource_group="$2"
|
||||
|
||||
print_status "Getting VM details from Azure..."
|
||||
|
||||
# Get VM private IP
|
||||
local private_ip=$(az vm show --resource-group "$resource_group" --name "$vm_name" --query "privateIps" -o tsv 2>/dev/null)
|
||||
if [[ -z "$private_ip" ]]; then
|
||||
print_error "Failed to get private IP for VM: $vm_name"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Get VM power state
|
||||
local power_state=$(az vm show --resource-group "$resource_group" --name "$vm_name" --query "powerState" -o tsv 2>/dev/null)
|
||||
if [[ "$power_state" != "VM running" ]]; then
|
||||
print_warning "VM is not running. Current state: $power_state"
|
||||
read -p "Do you want to start the VM? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
az vm start --resource-group "$resource_group" --name "$vm_name"
|
||||
print_status "Waiting for VM to start..."
|
||||
sleep 30
|
||||
else
|
||||
print_error "VM must be running to proceed"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$private_ip"
|
||||
}
|
||||
|
||||
# Function to test SSH connectivity
|
||||
test_ssh_connectivity() {
|
||||
local ip_address="$1"
|
||||
|
||||
print_status "Testing SSH connectivity to $ip_address..."
|
||||
|
||||
# Test SSH connection
|
||||
if timeout 10 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$ANSIBLE_USER@$ip_address" "echo 'SSH connection successful'" 2>/dev/null; then
|
||||
print_success "SSH connectivity verified"
|
||||
return 0
|
||||
else
|
||||
print_error "SSH connection failed to $ip_address"
|
||||
print_warning "Please ensure:"
|
||||
print_warning "1. VM is running"
|
||||
print_warning "2. Network security group allows SSH (port 22)"
|
||||
print_warning "3. SSH service is running on the VM"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to update inventory file
|
||||
update_inventory() {
|
||||
local vm_name="$1"
|
||||
local ip_address="$2"
|
||||
local node_type="$3"
|
||||
|
||||
print_status "Updating inventory file..."
|
||||
|
||||
# Create backup of inventory file
|
||||
cp "$INVENTORY_FILE" "${INVENTORY_FILE}.backup.$(date +%Y%m%d_%H%M%S)"
|
||||
|
||||
# Add node to inventory based on type
|
||||
if [[ "$node_type" == "worker" ]]; then
|
||||
echo "$vm_name ansible_host=$ip_address ansible_user=$ANSIBLE_USER host_name=$vm_name" >> "$INVENTORY_FILE"
|
||||
print_success "Added worker node to inventory"
|
||||
elif [[ "$node_type" == "master" ]]; then
|
||||
echo "$vm_name ansible_host=$ip_address ansible_user=$ANSIBLE_USER etcd_member_name=${vm_name}-etcd host_name=$vm_name" >> "$INVENTORY_FILE"
|
||||
print_success "Added master node to inventory"
|
||||
else
|
||||
print_error "Invalid node type: $node_type"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to verify inventory
|
||||
verify_inventory() {
|
||||
print_status "Verifying inventory configuration..."
|
||||
|
||||
# Test inventory syntax
|
||||
if ansible-inventory -i "$INVENTORY_FILE" --list > /dev/null 2>&1; then
|
||||
print_success "Inventory syntax is valid"
|
||||
else
|
||||
print_error "Inventory syntax is invalid"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Test connectivity to all nodes
|
||||
print_status "Testing connectivity to all nodes..."
|
||||
if ansible -i "$INVENTORY_FILE" all -m ping -kK; then
|
||||
print_success "Connectivity to all nodes verified"
|
||||
else
|
||||
print_error "Connectivity test failed"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to run kubespray scale playbook
|
||||
run_scale_playbook() {
|
||||
print_status "Running Kubespray scale playbook..."
|
||||
|
||||
cd "$(dirname "$INVENTORY_FILE")"
|
||||
|
||||
# Run the scale playbook
|
||||
if ansible-playbook -i inventory.ini "$KUBESPRAY_DIR/scale.yml" -kK -b; then
|
||||
print_success "Scale playbook completed successfully"
|
||||
else
|
||||
print_error "Scale playbook failed"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to verify node addition
|
||||
verify_node_addition() {
|
||||
local vm_name="$1"
|
||||
|
||||
print_status "Verifying node addition..."
|
||||
|
||||
# Wait for node to appear
|
||||
local max_attempts=30
|
||||
local attempt=1
|
||||
|
||||
while [[ $attempt -le $max_attempts ]]; do
|
||||
if kubectl get nodes | grep -q "$vm_name"; then
|
||||
print_success "Node $vm_name found in cluster"
|
||||
break
|
||||
fi
|
||||
|
||||
print_status "Waiting for node to appear... (attempt $attempt/$max_attempts)"
|
||||
sleep 10
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
if [[ $attempt -gt $max_attempts ]]; then
|
||||
print_error "Node $vm_name did not appear in cluster"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Wait for node to be ready
|
||||
attempt=1
|
||||
while [[ $attempt -le $max_attempts ]]; do
|
||||
local node_status=$(kubectl get nodes "$vm_name" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null)
|
||||
if [[ "$node_status" == "True" ]]; then
|
||||
print_success "Node $vm_name is ready"
|
||||
break
|
||||
fi
|
||||
|
||||
print_status "Waiting for node to be ready... (attempt $attempt/$max_attempts)"
|
||||
sleep 10
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
if [[ $attempt -gt $max_attempts ]]; then
|
||||
print_error "Node $vm_name is not ready"
|
||||
kubectl describe node "$vm_name"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to test pod scheduling
|
||||
test_pod_scheduling() {
|
||||
local vm_name="$1"
|
||||
|
||||
print_status "Testing pod scheduling on new node..."
|
||||
|
||||
# Create a test pod
|
||||
local test_pod_name="test-pod-$(date +%s)"
|
||||
kubectl run "$test_pod_name" --image=nginx --restart=Never --overrides="{\"spec\":{\"nodeSelector\":{\"kubernetes.io/hostname\":\"$vm_name\"}}}"
|
||||
|
||||
# Wait for pod to be scheduled
|
||||
local max_attempts=30
|
||||
local attempt=1
|
||||
|
||||
while [[ $attempt -le $max_attempts ]]; do
|
||||
local pod_status=$(kubectl get pod "$test_pod_name" -o jsonpath='{.status.phase}' 2>/dev/null)
|
||||
if [[ "$pod_status" == "Running" ]]; then
|
||||
print_success "Test pod is running on node $vm_name"
|
||||
break
|
||||
fi
|
||||
|
||||
print_status "Waiting for test pod to be ready... (attempt $attempt/$max_attempts)"
|
||||
sleep 10
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
# Clean up test pod
|
||||
kubectl delete pod "$test_pod_name"
|
||||
|
||||
if [[ $attempt -gt $max_attempts ]]; then
|
||||
print_error "Test pod failed to run on node $vm_name"
|
||||
kubectl describe pod "$test_pod_name"
|
||||
kubectl delete pod "$test_pod_name"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to display final status
|
||||
display_final_status() {
|
||||
local vm_name="$1"
|
||||
|
||||
print_success "Node addition completed successfully!"
|
||||
echo
|
||||
echo "=== Final Status ==="
|
||||
echo "Node Name: $vm_name"
|
||||
echo "Node Status: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}')"
|
||||
echo "Node IP: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')"
|
||||
echo "Node Capacity: $(kubectl get nodes "$vm_name" -o jsonpath='{.status.capacity.cpu}') CPU, $(kubectl get nodes "$vm_name" -o jsonpath='{.status.capacity.memory}') Memory"
|
||||
echo
|
||||
echo "=== Next Steps ==="
|
||||
echo "1. Monitor the node for any issues"
|
||||
echo "2. Update monitoring and alerting if needed"
|
||||
echo "3. Update documentation"
|
||||
echo "4. Consider running node maintenance tasks"
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
echo "=========================================="
|
||||
echo "Azure Kubernetes Node Addition Script"
|
||||
echo "=========================================="
|
||||
echo
|
||||
|
||||
# Check prerequisites
|
||||
check_prerequisites
|
||||
|
||||
# Get user input
|
||||
echo "Please provide the following information:"
|
||||
echo
|
||||
|
||||
read -p "VM Name: " vm_name
|
||||
validate_input "$vm_name" || exit 1
|
||||
|
||||
read -p "Resource Group: " resource_group
|
||||
validate_input "$resource_group" || exit 1
|
||||
|
||||
read -p "Node Type (worker/master): " node_type
|
||||
if [[ "$node_type" != "worker" && "$node_type" != "master" ]]; then
|
||||
print_error "Node type must be 'worker' or 'master'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo
|
||||
print_status "Summary:"
|
||||
echo " VM Name: $vm_name"
|
||||
echo " Resource Group: $resource_group"
|
||||
echo " Node Type: $node_type"
|
||||
echo
|
||||
|
||||
read -p "Proceed with node addition? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
||||
print_status "Operation cancelled"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Get VM details
|
||||
ip_address=$(get_vm_details "$vm_name" "$resource_group")
|
||||
if [[ $? -ne 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "VM IP Address: $ip_address"
|
||||
|
||||
# Test SSH connectivity
|
||||
test_ssh_connectivity "$ip_address" || exit 1
|
||||
|
||||
# Update inventory
|
||||
update_inventory "$vm_name" "$ip_address" "$node_type" || exit 1
|
||||
|
||||
# Verify inventory
|
||||
verify_inventory || exit 1
|
||||
|
||||
# Run scale playbook
|
||||
run_scale_playbook || exit 1
|
||||
|
||||
# Verify node addition
|
||||
verify_node_addition "$vm_name" || exit 1
|
||||
|
||||
# Test pod scheduling
|
||||
test_pod_scheduling "$vm_name" || exit 1
|
||||
|
||||
# Display final status
|
||||
display_final_status "$vm_name"
|
||||
}
|
||||
|
||||
# Handle script arguments
|
||||
if [[ $# -eq 0 ]]; then
|
||||
main
|
||||
else
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " --help, -h Show this help message"
|
||||
echo " --version, -v Show version information"
|
||||
echo
|
||||
echo "This script automates the process of adding new Azure VMs to an existing Kubernetes cluster."
|
||||
echo "It will prompt for necessary information and guide you through the process."
|
||||
exit 0
|
||||
;;
|
||||
--version|-v)
|
||||
echo "Azure Kubernetes Node Addition Script v1.0"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown option: $1"
|
||||
echo "Use --help for usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
394
docs/bootstrap-k8s-cluster.sh
Executable file
394
docs/bootstrap-k8s-cluster.sh
Executable file
@ -0,0 +1,394 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Freeleaps Kubernetes Cluster Bootstrap Script
|
||||
# This script bootstraps a complete Kubernetes cluster from Azure VMs
|
||||
|
||||
set -e
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
FREELEAPS_OPS_DIR="$(dirname "$SCRIPT_DIR")"
|
||||
INVENTORY_FILE="$FREELEAPS_OPS_DIR/cluster/ansible/manifests/inventory.ini"
|
||||
KUBESPRAY_DIR="$FREELEAPS_OPS_DIR/3rd/kubespray"
|
||||
MANIFESTS_DIR="$FREELEAPS_OPS_DIR/cluster/manifests"
|
||||
BIN_DIR="$FREELEAPS_OPS_DIR/cluster/bin"
|
||||
|
||||
# Function to print colored output
|
||||
print_status() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# Function to check prerequisites
|
||||
check_prerequisites() {
|
||||
print_status "Checking prerequisites..."
|
||||
|
||||
# Check if we're in the right directory
|
||||
if [[ ! -f "$INVENTORY_FILE" ]]; then
|
||||
print_error "Inventory file not found: $INVENTORY_FILE"
|
||||
print_error "Please run this script from the freeleaps-ops/docs directory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if kubespray exists
|
||||
if [[ ! -d "$KUBESPRAY_DIR" ]]; then
|
||||
print_error "Kubespray directory not found: $KUBESPRAY_DIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check required tools
|
||||
local missing_tools=()
|
||||
|
||||
if ! command -v ansible &> /dev/null; then
|
||||
missing_tools+=("ansible")
|
||||
fi
|
||||
|
||||
if ! command -v az &> /dev/null; then
|
||||
missing_tools+=("azure-cli")
|
||||
fi
|
||||
|
||||
if ! command -v kubectl &> /dev/null; then
|
||||
missing_tools+=("kubectl")
|
||||
fi
|
||||
|
||||
if [[ ${#missing_tools[@]} -gt 0 ]]; then
|
||||
print_error "Missing required tools: ${missing_tools[*]}"
|
||||
print_warning "Please install missing tools before proceeding"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_success "All prerequisites are met"
|
||||
}
|
||||
|
||||
# Function to verify Azure VMs
|
||||
verify_azure_vms() {
|
||||
print_status "Verifying Azure VMs..."
|
||||
|
||||
# Get VMs from inventory
|
||||
local vms=()
|
||||
while IFS= read -r line; do
|
||||
if [[ $line =~ ^[a-zA-Z0-9-]+ ]]; then
|
||||
vm_name=$(echo "$line" | awk '{print $1}')
|
||||
vms+=("$vm_name")
|
||||
fi
|
||||
done < "$INVENTORY_FILE"
|
||||
|
||||
print_status "Found VMs in inventory: ${vms[*]}"
|
||||
|
||||
# Check VM status in Azure
|
||||
for vm in "${vms[@]}"; do
|
||||
local power_state=$(az vm show --resource-group k8s --name "$vm" --query "powerState" -o tsv 2>/dev/null)
|
||||
if [[ "$power_state" != "VM running" ]]; then
|
||||
print_warning "VM $vm is not running (state: $power_state)"
|
||||
read -p "Do you want to start VM $vm? (y/N): " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
az vm start --resource-group k8s --name "$vm"
|
||||
print_status "Starting VM $vm..."
|
||||
sleep 30
|
||||
fi
|
||||
else
|
||||
print_success "VM $vm is running"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Function to test connectivity
|
||||
test_connectivity() {
|
||||
print_status "Testing connectivity to all VMs..."
|
||||
|
||||
cd "$(dirname "$INVENTORY_FILE")"
|
||||
|
||||
if ansible -i inventory.ini all -m ping -kK; then
|
||||
print_success "Connectivity to all VMs verified"
|
||||
else
|
||||
print_error "Connectivity test failed"
|
||||
print_warning "Please check:"
|
||||
print_warning "1. VMs are running"
|
||||
print_warning "2. Network security groups allow SSH (port 22)"
|
||||
print_warning "3. SSH credentials are correct"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to bootstrap Kubernetes cluster
|
||||
bootstrap_cluster() {
|
||||
print_status "Bootstrapping Kubernetes cluster..."
|
||||
|
||||
cd "$KUBESPRAY_DIR"
|
||||
|
||||
print_status "Running Kubespray cluster installation..."
|
||||
print_warning "This process may take 15-30 minutes..."
|
||||
|
||||
if ansible-playbook -i ../../cluster/ansible/manifests/inventory.ini ./cluster.yml -kK -b; then
|
||||
print_success "Kubernetes cluster bootstrapped successfully"
|
||||
else
|
||||
print_error "Cluster bootstrap failed"
|
||||
print_warning "Check the Ansible output for errors"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to get kubeconfig
|
||||
get_kubeconfig() {
|
||||
print_status "Retrieving kubeconfig..."
|
||||
|
||||
# Get the first master node IP
|
||||
local master_ip=$(grep -A 10 "\[kube_control_plane\]" "$INVENTORY_FILE" | grep ansible_host | head -1 | awk '{print $2}' | cut -d'=' -f2)
|
||||
|
||||
if [[ -z "$master_ip" ]]; then
|
||||
print_error "Could not find master node IP in inventory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_status "Getting kubeconfig from master node: $master_ip"
|
||||
|
||||
# Create .kube directory if it doesn't exist
|
||||
mkdir -p ~/.kube
|
||||
|
||||
# Get kubeconfig from master node
|
||||
ssh wwwadmin@mathmast.com@"$master_ip" "sudo cat /etc/kubernetes/admin.conf" > ~/.kube/config
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
print_success "Kubeconfig retrieved successfully"
|
||||
else
|
||||
print_error "Failed to retrieve kubeconfig"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to verify cluster
|
||||
verify_cluster() {
|
||||
print_status "Verifying cluster installation..."
|
||||
|
||||
# Wait for cluster to be ready
|
||||
local max_attempts=30
|
||||
local attempt=1
|
||||
|
||||
while [[ $attempt -le $max_attempts ]]; do
|
||||
if kubectl get nodes &> /dev/null; then
|
||||
print_success "Cluster is accessible"
|
||||
break
|
||||
fi
|
||||
|
||||
print_status "Waiting for cluster to be ready... (attempt $attempt/$max_attempts)"
|
||||
sleep 30
|
||||
((attempt++))
|
||||
done
|
||||
|
||||
if [[ $attempt -gt $max_attempts ]]; then
|
||||
print_error "Cluster verification failed"
|
||||
print_warning "Troubleshooting steps:"
|
||||
print_warning "1. Check VM resources (CPU, memory)"
|
||||
print_warning "2. Check network connectivity between nodes"
|
||||
print_warning "3. Check Ansible logs for errors"
|
||||
print_warning "4. Verify inventory file configuration"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check node status
|
||||
print_status "Checking node status..."
|
||||
kubectl get nodes
|
||||
|
||||
# Wait for all nodes to be ready
|
||||
local ready_nodes=$(kubectl get nodes --no-headers | grep -c "Ready")
|
||||
local total_nodes=$(kubectl get nodes --no-headers | wc -l)
|
||||
|
||||
if [[ $ready_nodes -eq $total_nodes ]]; then
|
||||
print_success "All $total_nodes nodes are ready"
|
||||
else
|
||||
print_warning "Only $ready_nodes/$total_nodes nodes are ready"
|
||||
kubectl get nodes
|
||||
fi
|
||||
|
||||
# Check system pods
|
||||
print_status "Checking system pods..."
|
||||
kubectl get pods -n kube-system
|
||||
|
||||
# Wait for critical system pods
|
||||
print_status "Waiting for critical system pods..."
|
||||
local critical_pods=("kube-apiserver" "kube-controller-manager" "kube-scheduler" "etcd")
|
||||
|
||||
for pod_prefix in "${critical_pods[@]}"; do
|
||||
local max_pod_attempts=20
|
||||
local pod_attempt=1
|
||||
|
||||
while [[ $pod_attempt -le $max_pod_attempts ]]; do
|
||||
if kubectl get pods -n kube-system | grep -q "$pod_prefix.*Running"; then
|
||||
print_success "$pod_prefix is running"
|
||||
break
|
||||
fi
|
||||
|
||||
if [[ $pod_attempt -eq $max_pod_attempts ]]; then
|
||||
print_warning "$pod_prefix is not running"
|
||||
kubectl get pods -n kube-system | grep "$pod_prefix"
|
||||
fi
|
||||
|
||||
sleep 10
|
||||
((pod_attempt++))
|
||||
done
|
||||
done
|
||||
|
||||
# Check cluster info
|
||||
print_status "Checking cluster info..."
|
||||
kubectl cluster-info
|
||||
}
|
||||
|
||||
# Function to deploy infrastructure
|
||||
deploy_infrastructure() {
|
||||
print_status "Deploying infrastructure components..."
|
||||
|
||||
cd "$MANIFESTS_DIR"
|
||||
|
||||
# Deploy in order
|
||||
local components=(
|
||||
"freeleaps-controls-system"
|
||||
"freeleaps-devops-system"
|
||||
"freeleaps-monitoring-system"
|
||||
"freeleaps-logging-system"
|
||||
"freeleaps-data-platform"
|
||||
)
|
||||
|
||||
for component in "${components[@]}"; do
|
||||
if [[ -d "$component" ]]; then
|
||||
print_status "Deploying $component..."
|
||||
kubectl apply -f "$component/"
|
||||
|
||||
# Wait for deployment to stabilize
|
||||
print_status "Waiting for $component to stabilize..."
|
||||
sleep 30
|
||||
else
|
||||
print_warning "Component directory not found: $component"
|
||||
fi
|
||||
done
|
||||
|
||||
print_success "Infrastructure deployment completed"
|
||||
}
|
||||
|
||||
# Function to setup authentication
|
||||
setup_authentication() {
|
||||
print_status "Setting up authentication..."
|
||||
|
||||
cd "$BIN_DIR"
|
||||
|
||||
if [[ -f "freeleaps-cluster-authenticator" ]]; then
|
||||
print_status "Running authentication setup..."
|
||||
./freeleaps-cluster-authenticator auth
|
||||
else
|
||||
print_warning "Authentication script not found"
|
||||
print_warning "Please run authentication setup manually"
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to display final status
|
||||
display_final_status() {
|
||||
print_success "Kubernetes cluster bootstrap completed!"
|
||||
echo
|
||||
echo "=== Cluster Status ==="
|
||||
kubectl get nodes
|
||||
echo
|
||||
echo "=== System Pods ==="
|
||||
kubectl get pods -n kube-system
|
||||
echo
|
||||
echo "=== Infrastructure Status ==="
|
||||
kubectl get pods --all-namespaces | grep -E "(argocd|cert-manager|prometheus|grafana)"
|
||||
echo
|
||||
echo "=== Next Steps ==="
|
||||
echo "1. Verify all components are running: kubectl get pods --all-namespaces"
|
||||
echo "2. Access ArgoCD: kubectl port-forward svc/argocd-server -n freeleaps-devops-system 8080:80"
|
||||
echo "3. Access Grafana: kubectl port-forward svc/kube-prometheus-stack-grafana -n freeleaps-monitoring-system 3000:80"
|
||||
echo "4. Setup authentication: cd $BIN_DIR && ./freeleaps-cluster-authenticator auth"
|
||||
echo "5. Deploy applications via ArgoCD"
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
echo "=========================================="
|
||||
echo "Freeleaps Kubernetes Cluster Bootstrap"
|
||||
echo "=========================================="
|
||||
echo
|
||||
|
||||
# Check prerequisites
|
||||
check_prerequisites
|
||||
|
||||
# Verify Azure VMs
|
||||
verify_azure_vms
|
||||
|
||||
# Test connectivity
|
||||
test_connectivity
|
||||
|
||||
# Bootstrap cluster
|
||||
bootstrap_cluster
|
||||
|
||||
# Get kubeconfig
|
||||
get_kubeconfig
|
||||
|
||||
# Verify cluster
|
||||
verify_cluster
|
||||
|
||||
# Deploy infrastructure
|
||||
deploy_infrastructure
|
||||
|
||||
# Setup authentication
|
||||
setup_authentication
|
||||
|
||||
# Display final status
|
||||
display_final_status
|
||||
}
|
||||
|
||||
# Handle script arguments
|
||||
if [[ $# -eq 0 ]]; then
|
||||
main
|
||||
else
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " --help, -h Show this help message"
|
||||
echo " --verify Only verify prerequisites and connectivity"
|
||||
echo " --bootstrap Only bootstrap the cluster (skip infrastructure)"
|
||||
echo
|
||||
echo "This script bootstraps a complete Kubernetes cluster from Azure VMs."
|
||||
exit 0
|
||||
;;
|
||||
--verify)
|
||||
check_prerequisites
|
||||
verify_azure_vms
|
||||
test_connectivity
|
||||
print_success "Verification completed successfully"
|
||||
;;
|
||||
--bootstrap)
|
||||
check_prerequisites
|
||||
verify_azure_vms
|
||||
test_connectivity
|
||||
bootstrap_cluster
|
||||
get_kubeconfig
|
||||
verify_cluster
|
||||
print_success "Cluster bootstrap completed"
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown option: $1"
|
||||
echo "Use --help for usage information"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
141
docs/examples/basic-pod.yaml
Normal file
141
docs/examples/basic-pod.yaml
Normal file
@ -0,0 +1,141 @@
|
||||
# Basic Pod Example with Detailed Comments
|
||||
# This example shows a simple nginx pod with health checks and resource limits
|
||||
#
|
||||
# 🎯 What this does: Creates a single nginx web server pod that:
|
||||
# - Runs nginx web server on port 80
|
||||
# - Has health checks to ensure it's working
|
||||
# - Has resource limits to prevent it from consuming too much CPU/memory
|
||||
# - Includes security best practices
|
||||
#
|
||||
# 📚 EDUCATIONAL EXAMPLE (not from your codebase)
|
||||
# This is a learning example. Your codebase uses Helm charts and Deployments instead of direct Pods.
|
||||
#
|
||||
# ⚠️ IMPORTANT: Direct Pod creation is NOT good practice for production!
|
||||
# This example is for learning purposes only. In production, you should use:
|
||||
# - Deployments (for applications)
|
||||
# - StatefulSets (for databases)
|
||||
# - Helm charts (for complex applications)
|
||||
# - kubectl apply (for declarative deployments)
|
||||
|
||||
apiVersion: v1 # ← Kubernetes API version for Pod resources
|
||||
kind: Pod # ← Resource type: Pod (smallest deployable unit)
|
||||
metadata: # ← Metadata section: describes the pod
|
||||
name: nginx-pod # ← Unique name for this pod in the namespace
|
||||
namespace: default # ← Namespace where pod will be created (default if not specified)
|
||||
labels: # ← Labels for organizing and selecting pods
|
||||
app: nginx # ← Label: identifies this as an nginx application
|
||||
version: v1 # ← Label: version of the application
|
||||
environment: development # ← Label: environment this pod runs in
|
||||
spec: # ← Specification: defines what the pod should do
|
||||
containers: # ← List of containers in this pod
|
||||
- name: nginx # ← Container name (used for logs, exec, etc.)
|
||||
image: nginx:latest # ← Docker image to run (nginx with latest tag)
|
||||
ports: # ← Ports the container exposes
|
||||
- containerPort: 80 # ← Port 80 inside the container (nginx default)
|
||||
name: http # ← Name for this port (useful for service references)
|
||||
protocol: TCP # ← Protocol (TCP is default)
|
||||
|
||||
# 🔧 Resource Management
|
||||
# These limits prevent the pod from consuming too many resources
|
||||
# Think of it like setting a budget for CPU and memory usage
|
||||
resources:
|
||||
requests: # ← Minimum resources guaranteed to the pod
|
||||
memory: "64Mi" # ← 64 megabytes of RAM (minimum guaranteed)
|
||||
cpu: "250m" # ← 0.25 CPU cores (250 millicores = 25% of 1 CPU)
|
||||
limits: # ← Maximum resources the pod can use
|
||||
memory: "128Mi" # ← 128 megabytes of RAM (maximum allowed)
|
||||
cpu: "500m" # ← 0.5 CPU cores (500 millicores = 50% of 1 CPU)
|
||||
|
||||
# 🏥 Health Checks
|
||||
# These tell Kubernetes how to check if the pod is healthy
|
||||
# Like a doctor checking your vital signs!
|
||||
livenessProbe: # ← Checks if the pod is alive (restarts if failed)
|
||||
httpGet: # ← Use HTTP GET request to check health
|
||||
path: / # ← Check the root path of nginx
|
||||
port: 80 # ← Check on port 80
|
||||
initialDelaySeconds: 30 # ← Wait 30 seconds before first check (nginx startup time)
|
||||
periodSeconds: 10 # ← Check every 10 seconds
|
||||
timeoutSeconds: 5 # ← Fail if response takes longer than 5 seconds
|
||||
failureThreshold: 3 # ← Restart pod after 3 consecutive failures
|
||||
|
||||
readinessProbe: # ← Checks if the pod is ready to receive traffic
|
||||
httpGet: # ← Use HTTP GET request to check readiness
|
||||
path: / # ← Check the root path
|
||||
port: 80 # ← Check on port 80
|
||||
initialDelaySeconds: 5 # ← Wait 5 seconds before first check
|
||||
periodSeconds: 5 # ← Check every 5 seconds
|
||||
timeoutSeconds: 3 # ← Fail if response takes longer than 3 seconds
|
||||
failureThreshold: 3 # ← Mark as not ready after 3 consecutive failures
|
||||
|
||||
# 🔒 Security Context
|
||||
# These settings make the pod more secure
|
||||
# Like locking your doors and windows!
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false # ← Prevent the container from gaining root privileges
|
||||
readOnlyRootFilesystem: true # ← Make the root filesystem read-only (more secure)
|
||||
capabilities: # ← Remove unnecessary Linux capabilities
|
||||
drop: # ← Drop these capabilities
|
||||
- ALL # ← Drop ALL capabilities (most restrictive)
|
||||
runAsNonRoot: true # ← Don't run as root user
|
||||
runAsUser: 101 # ← Run as user ID 101 (nginx user)
|
||||
|
||||
# 📁 Volume Mounts
|
||||
# These allow the container to access files from the pod
|
||||
volumeMounts:
|
||||
- name: tmp-volume # ← Name of the volume to mount
|
||||
mountPath: /tmp # ← Where to mount it inside the container
|
||||
readOnly: false # ← Allow read/write access
|
||||
|
||||
# 💾 Volumes
|
||||
# These define storage that can be mounted into containers
|
||||
volumes:
|
||||
- name: tmp-volume # ← Volume name (matches volumeMounts above)
|
||||
emptyDir: {} # ← Empty directory volume (temporary, deleted when pod dies)
|
||||
# emptyDir creates a temporary directory that exists as long as the pod exists
|
||||
# Perfect for temporary files, caches, etc.
|
||||
|
||||
# 🚀 How to use this (FOR LEARNING ONLY):
|
||||
# kubectl apply -f basic-pod.yaml
|
||||
# kubectl get pods # Check if pod is running
|
||||
# kubectl logs nginx-pod # View nginx logs
|
||||
# kubectl port-forward nginx-pod 8080:80 # Access nginx at http://localhost:8080
|
||||
# kubectl exec -it nginx-pod -- /bin/bash # Get a shell inside the pod
|
||||
|
||||
# 🏭 YOUR CODEBASE COMPARISON:
|
||||
#
|
||||
# ❌ Your codebase does NOT create Pods directly like this
|
||||
# ✅ Your codebase uses Helm charts and Deployments instead
|
||||
#
|
||||
# Example from your codebase:
|
||||
# - Helm charts in: freeleaps-ops/freeleaps/helm-pkg/
|
||||
# - Deployments with replicas, rolling updates, etc.
|
||||
# - Automatic pod creation via Deployment controllers
|
||||
#
|
||||
# Commands your codebase actually uses:
|
||||
# helm install/upgrade <release> <chart> --namespace <namespace> -f <values.yaml>
|
||||
# kubectl get pods -n <namespace> -l app.kubernetes.io/name=<app-name>
|
||||
|
||||
# 🎯 PRODUCTION BEST PRACTICES:
|
||||
#
|
||||
# ❌ DON'T DO THIS (bad practices):
|
||||
# kubectl run nginx --image=nginx:latest # Creates standalone Pod
|
||||
# kubectl run my-app --image=my-app:latest --port=8080 # No self-healing
|
||||
# kubectl run database --image=postgres:13 --port=5432 # No scaling
|
||||
#
|
||||
# ✅ DO THIS INSTEAD (good practices):
|
||||
# kubectl create deployment nginx --image=nginx:latest # Creates Deployment
|
||||
# helm install my-app ./my-app-chart --namespace my-app # Use Helm charts
|
||||
# kubectl apply -f deployment.yaml # Declarative deployment
|
||||
# kubectl apply -f statefulset.yaml # For databases
|
||||
#
|
||||
# 🔧 When kubectl run is OK (limited use cases):
|
||||
# kubectl run debug-pod --image=busybox --rm -it --restart=Never -- nslookup my-service
|
||||
# kubectl run test-pod --image=nginx --rm -it --restart=Never -- curl http://my-service:80
|
||||
|
||||
# 📚 Learn more:
|
||||
# - Pods: https://kubernetes.io/docs/concepts/workloads/pods/
|
||||
# - Deployments: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
|
||||
# - Helm: https://helm.sh/docs/
|
||||
# - Health Checks: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||
# - Security Context: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
|
||||
# - Resource Management: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
|
||||
468
docs/examples/complete-application-example.yaml
Normal file
468
docs/examples/complete-application-example.yaml
Normal file
@ -0,0 +1,468 @@
|
||||
# Complete Application Example
|
||||
# This demonstrates a full web application with database, API, and monitoring
|
||||
|
||||
# 1. Namespace
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: complete-app
|
||||
labels:
|
||||
environment: production
|
||||
team: backend
|
||||
app: complete-app
|
||||
---
|
||||
# 2. ConfigMap for application configuration
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: app-config
|
||||
namespace: complete-app
|
||||
data:
|
||||
DB_HOST: "postgres-service"
|
||||
DB_PORT: "5432"
|
||||
DB_NAME: "myapp"
|
||||
REDIS_HOST: "redis-service"
|
||||
REDIS_PORT: "6379"
|
||||
ENVIRONMENT: "production"
|
||||
LOG_LEVEL: "INFO"
|
||||
|
||||
application.properties: |
|
||||
server.port=8080
|
||||
logging.level=INFO
|
||||
cache.enabled=true
|
||||
session.timeout=3600
|
||||
cors.allowed-origins=*
|
||||
---
|
||||
# 3. Secret for sensitive data
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: app-secrets
|
||||
namespace: complete-app
|
||||
type: Opaque
|
||||
data:
|
||||
DB_USERNAME: YWRtaW4= # admin
|
||||
DB_PASSWORD: c2VjcmV0MTIz # secret123
|
||||
API_KEY: bXktYXBpLWtleQ== # my-api-key
|
||||
JWT_SECRET: bXktand0LXNlY3JldA== # my-jwt-secret
|
||||
---
|
||||
# 4. PVC for database
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: postgres-pvc
|
||||
namespace: complete-app
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
storageClassName: managed-premium
|
||||
---
|
||||
# 5. PVC for application data
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: app-data-pvc
|
||||
namespace: complete-app
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
storageClassName: managed-premium
|
||||
---
|
||||
# 6. PostgreSQL Database Deployment
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: postgres
|
||||
namespace: complete-app
|
||||
labels:
|
||||
app: postgres
|
||||
component: database
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: postgres
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: postgres
|
||||
component: database
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
fsGroup: 999
|
||||
containers:
|
||||
- name: postgres
|
||||
image: postgres:13
|
||||
ports:
|
||||
- containerPort: 5432
|
||||
env:
|
||||
- name: POSTGRES_DB
|
||||
value: "myapp"
|
||||
- name: POSTGRES_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: app-secrets
|
||||
key: DB_USERNAME
|
||||
- name: POSTGRES_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: app-secrets
|
||||
key: DB_PASSWORD
|
||||
volumeMounts:
|
||||
- name: postgres-data
|
||||
mountPath: /var/lib/postgresql/data
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- pg_isready
|
||||
- -U
|
||||
- admin
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- pg_isready
|
||||
- -U
|
||||
- admin
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: postgres-data
|
||||
persistentVolumeClaim:
|
||||
claimName: postgres-pvc
|
||||
---
|
||||
# 7. Redis Cache Deployment
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: complete-app
|
||||
labels:
|
||||
app: redis
|
||||
component: cache
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: redis
|
||||
component: cache
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
fsGroup: 999
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "256Mi"
|
||||
cpu: "200m"
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- redis-cli
|
||||
- ping
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- redis-cli
|
||||
- ping
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
---
|
||||
# 8. Web Application Deployment
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: web-app
|
||||
namespace: complete-app
|
||||
labels:
|
||||
app: web-app
|
||||
component: frontend
|
||||
spec:
|
||||
replicas: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: web-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: web-app
|
||||
component: frontend
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 2000
|
||||
containers:
|
||||
- name: web-app
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
requests:
|
||||
memory: "64Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "128Mi"
|
||||
cpu: "200m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 80
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: tmp-volume
|
||||
mountPath: /tmp
|
||||
volumes:
|
||||
- name: tmp-volume
|
||||
emptyDir: {}
|
||||
---
|
||||
# 9. API Application Deployment
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: api-app
|
||||
namespace: complete-app
|
||||
labels:
|
||||
app: api-app
|
||||
component: backend
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: api-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: api-app
|
||||
component: backend
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 2000
|
||||
containers:
|
||||
- name: api-app
|
||||
image: python:3.9-slim
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
env:
|
||||
- name: DB_HOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_HOST
|
||||
- name: DB_PORT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_PORT
|
||||
- name: DB_NAME
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_NAME
|
||||
- name: DB_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: app-secrets
|
||||
key: DB_USERNAME
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: app-secrets
|
||||
key: DB_PASSWORD
|
||||
- name: REDIS_HOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: REDIS_HOST
|
||||
- name: REDIS_PORT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: REDIS_PORT
|
||||
- name: API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: app-secrets
|
||||
key: API_KEY
|
||||
- name: JWT_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: app-secrets
|
||||
key: JWT_SECRET
|
||||
volumeMounts:
|
||||
- name: app-data
|
||||
mountPath: /app/data
|
||||
- name: config-volume
|
||||
mountPath: /app/config
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumes:
|
||||
- name: app-data
|
||||
persistentVolumeClaim:
|
||||
claimName: app-data-pvc
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: app-config
|
||||
---
|
||||
# 10. Services
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: postgres-service
|
||||
namespace: complete-app
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: postgres
|
||||
ports:
|
||||
- port: 5432
|
||||
targetPort: 5432
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis-service
|
||||
namespace: complete-app
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: redis
|
||||
ports:
|
||||
- port: 6379
|
||||
targetPort: 6379
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-service
|
||||
namespace: complete-app
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: api-app-service
|
||||
namespace: complete-app
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: api-app
|
||||
ports:
|
||||
- port: 8080
|
||||
targetPort: 8080
|
||||
protocol: TCP
|
||||
---
|
||||
# 11. Ingress
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: complete-app-ingress
|
||||
namespace: complete-app
|
||||
annotations:
|
||||
nginx.ingress.kubernetes.io/rewrite-target: /
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod"
|
||||
nginx.ingress.kubernetes.io/cors-allow-origin: "*"
|
||||
spec:
|
||||
tls:
|
||||
- hosts:
|
||||
- myapp.example.com
|
||||
- api.myapp.example.com
|
||||
secretName: myapp-tls
|
||||
rules:
|
||||
- host: myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: web-app-service
|
||||
port:
|
||||
number: 80
|
||||
- host: api.myapp.example.com
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: api-app-service
|
||||
port:
|
||||
number: 8080
|
||||
100
docs/examples/configmap-secret-example.yaml
Normal file
100
docs/examples/configmap-secret-example.yaml
Normal file
@ -0,0 +1,100 @@
|
||||
# ConfigMap for application configuration
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: app-config
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: web-app
|
||||
data:
|
||||
# Environment variables
|
||||
DB_HOST: "postgres-service"
|
||||
DB_PORT: "5432"
|
||||
ENVIRONMENT: "production"
|
||||
LOG_LEVEL: "INFO"
|
||||
|
||||
# File-like content
|
||||
application.properties: |
|
||||
server.port=8080
|
||||
logging.level=INFO
|
||||
cache.enabled=true
|
||||
session.timeout=3600
|
||||
---
|
||||
# Secret for sensitive data
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: db-secret
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: web-app
|
||||
type: Opaque
|
||||
data:
|
||||
# Base64 encoded values
|
||||
DB_USERNAME: YWRtaW4= # admin
|
||||
DB_PASSWORD: c2VjcmV0MTIz # secret123
|
||||
API_KEY: bXktYXBpLWtleQ== # my-api-key
|
||||
---
|
||||
# Deployment using ConfigMap and Secret
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: web-app-with-config
|
||||
namespace: my-app
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: web-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: web-app
|
||||
spec:
|
||||
containers:
|
||||
- name: web-app
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
env:
|
||||
# Environment variables from ConfigMap
|
||||
- name: DB_HOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_HOST
|
||||
- name: DB_PORT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_PORT
|
||||
- name: ENVIRONMENT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: ENVIRONMENT
|
||||
# Environment variables from Secret
|
||||
- name: DB_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_USERNAME
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_PASSWORD
|
||||
volumeMounts:
|
||||
# Mount ConfigMap as files
|
||||
- name: config-volume
|
||||
mountPath: /app/config
|
||||
- name: secret-volume
|
||||
mountPath: /app/secrets
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: app-config
|
||||
- name: secret-volume
|
||||
secret:
|
||||
secretName: db-secret
|
||||
158
docs/examples/deployment-example.yaml
Normal file
158
docs/examples/deployment-example.yaml
Normal file
@ -0,0 +1,158 @@
|
||||
# Production-Ready Deployment Example with Detailed Comments
|
||||
# This example shows a deployment that creates and manages multiple nginx pods
|
||||
#
|
||||
# 🎯 What this does: Creates a deployment that:
|
||||
# - Runs 3 copies of nginx web server (replicas)
|
||||
# - Automatically restarts failed pods
|
||||
# - Supports rolling updates (zero downtime)
|
||||
# - Includes security, health checks, and resource management
|
||||
# - Can be easily scaled up or down
|
||||
|
||||
# 📊 ASCII Diagram: How Deployments Work
|
||||
#
|
||||
# ┌─────────────────────────────────────────────────────────────┐
|
||||
# │ DEPLOYMENT │
|
||||
# │ ┌─────────────────────────────────────────────────────┐ │
|
||||
# │ │ name: web-app │ │
|
||||
# │ │ replicas: 3 │ │
|
||||
# │ └─────────────────────────────────────────────────────┘ │
|
||||
# │ │ │
|
||||
# │ ▼ │
|
||||
# │ ┌─────────────────────────────────────────────────────┐ │
|
||||
# │ │ POD TEMPLATE │ │
|
||||
# │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
# │ │ │ POD 1 │ │ POD 2 │ │ POD 3 │ │ │
|
||||
# │ │ │ nginx:latest│ │ nginx:latest│ │ nginx:latest│ │ │
|
||||
# │ │ │ port: 80 │ │ port: 80 │ │ port: 80 │ │ │
|
||||
# │ │ │ IP: 10.0.1.1│ │ IP: 10.0.1.2│ │ IP: 10.0.1.3│ │ │
|
||||
# │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
# │ └─────────────────────────────────────────────────────┘ │
|
||||
# └─────────────────────────────────────────────────────────────┘
|
||||
#
|
||||
# 🔄 Rolling Update Process:
|
||||
# ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
# │ OLD POD │ │ NEW POD │ │ OLD POD │
|
||||
# │ nginx:v1.0 │ │ nginx:v1.1 │ │ nginx:v1.0 │
|
||||
# └─────────────┘ └─────────────┘ └─────────────┘
|
||||
# │ │ │
|
||||
# ▼ ▼ ▼
|
||||
# ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
# │ NEW POD │ │ NEW POD │ │ NEW POD │
|
||||
# │ nginx:v1.1 │ │ nginx:v1.1 │ │ nginx:v1.1 │
|
||||
# └─────────────┘ └─────────────┘ └─────────────┘
|
||||
|
||||
apiVersion: apps/v1 # ← Kubernetes API version for Deployment resources
|
||||
kind: Deployment # ← Resource type: Deployment (manages multiple pods)
|
||||
metadata: # ← Metadata section: describes the deployment
|
||||
name: web-app # ← Unique name for this deployment
|
||||
namespace: my-app # ← Namespace where deployment will be created
|
||||
labels: # ← Labels for organizing and selecting deployments
|
||||
app: web-app # ← Label: identifies this as a web application
|
||||
version: v1 # ← Label: version of the application
|
||||
environment: production # ← Label: environment this runs in
|
||||
team: backend # ← Label: team responsible for this app
|
||||
spec: # ← Specification: defines what the deployment should do
|
||||
replicas: 3 # ← Number of pod copies to run (3 nginx instances)
|
||||
# Think of replicas like having 3 backup singers - if one gets sick,
|
||||
# the show goes on with the other 2!
|
||||
|
||||
selector: # ← How to find the pods this deployment manages
|
||||
matchLabels: # ← Match pods with these labels
|
||||
app: web-app # ← Only manage pods with label app=web-app
|
||||
|
||||
template: # ← Template for creating new pods
|
||||
metadata: # ← Metadata for pods created from this template
|
||||
labels: # ← Labels applied to all pods created by this deployment
|
||||
app: web-app # ← Must match selector above
|
||||
version: v1 # ← Version label for tracking
|
||||
environment: production # ← Environment label
|
||||
team: backend # ← Team label
|
||||
|
||||
spec: # ← Pod specification (same as basic-pod.yaml)
|
||||
# 🔒 Pod-Level Security Context
|
||||
# These settings apply to the entire pod
|
||||
securityContext:
|
||||
runAsNonRoot: true # ← Don't run any container as root
|
||||
runAsUser: 1000 # ← Run as user ID 1000
|
||||
fsGroup: 2000 # ← Set group ID for mounted volumes
|
||||
|
||||
containers: # ← List of containers in each pod
|
||||
- name: web-app # ← Container name
|
||||
image: nginx:latest # ← Docker image to run
|
||||
ports: # ← Ports the container exposes
|
||||
- containerPort: 80 # ← Port 80 inside the container
|
||||
name: http # ← Name for this port
|
||||
# 🔧 Resource Management
|
||||
# These limits prevent pods from consuming too many resources
|
||||
# Like setting a budget for each pod
|
||||
resources:
|
||||
requests: # ← Minimum resources guaranteed to each pod
|
||||
memory: "64Mi" # ← 64 megabytes of RAM (minimum guaranteed)
|
||||
cpu: "250m" # ← 0.25 CPU cores (250 millicores = 25% of 1 CPU)
|
||||
limits: # ← Maximum resources each pod can use
|
||||
memory: "128Mi" # ← 128 megabytes of RAM (maximum allowed)
|
||||
cpu: "500m" # ← 0.5 CPU cores (500 millicores = 50% of 1 CPU)
|
||||
|
||||
# 🏥 Health Checks
|
||||
# These tell Kubernetes how to check if each pod is healthy
|
||||
# Like having a health monitor for each pod
|
||||
livenessProbe: # ← Checks if the pod is alive (restarts if failed)
|
||||
httpGet: # ← Use HTTP GET request to check health
|
||||
path: / # ← Check the root path of nginx
|
||||
port: 80 # ← Check on port 80
|
||||
initialDelaySeconds: 30 # ← Wait 30 seconds before first check
|
||||
periodSeconds: 10 # ← Check every 10 seconds
|
||||
timeoutSeconds: 5 # ← Fail if response takes longer than 5 seconds
|
||||
failureThreshold: 3 # ← Restart pod after 3 consecutive failures
|
||||
|
||||
readinessProbe: # ← Checks if the pod is ready to receive traffic
|
||||
httpGet: # ← Use HTTP GET request to check readiness
|
||||
path: / # ← Check the root path
|
||||
port: 80 # ← Check on port 80
|
||||
initialDelaySeconds: 5 # ← Wait 5 seconds before first check
|
||||
periodSeconds: 5 # ← Check every 5 seconds
|
||||
timeoutSeconds: 3 # ← Fail if response takes longer than 3 seconds
|
||||
failureThreshold: 3 # ← Mark as not ready after 3 consecutive failures
|
||||
|
||||
# 🔒 Container-Level Security Context
|
||||
# These settings make each container more secure
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false # ← Prevent gaining root privileges
|
||||
readOnlyRootFilesystem: true # ← Make root filesystem read-only
|
||||
capabilities: # ← Remove unnecessary Linux capabilities
|
||||
drop: # ← Drop these capabilities
|
||||
- ALL # ← Drop ALL capabilities (most restrictive)
|
||||
|
||||
# 📁 Volume Mounts
|
||||
# These allow the container to access files from the pod
|
||||
volumeMounts:
|
||||
- name: tmp-volume # ← Name of the volume to mount
|
||||
mountPath: /tmp # ← Where to mount it inside the container
|
||||
|
||||
# 💾 Volumes
|
||||
# These define storage that can be mounted into containers
|
||||
volumes:
|
||||
- name: tmp-volume # ← Volume name (matches volumeMounts above)
|
||||
emptyDir: {} # ← Empty directory volume (temporary)
|
||||
|
||||
# 🚀 How to use this:
|
||||
# kubectl apply -f deployment-example.yaml
|
||||
# kubectl get deployments # Check deployment status
|
||||
# kubectl get pods -l app=web-app # See all pods created by this deployment
|
||||
# kubectl scale deployment web-app --replicas=5 # Scale up to 5 replicas
|
||||
# kubectl set image deployment/web-app web-app=nginx:1.21 # Update to new version
|
||||
# kubectl rollout status deployment/web-app # Check rollout progress
|
||||
# kubectl rollout undo deployment/web-app # Rollback to previous version
|
||||
|
||||
# 📊 What happens when you apply this:
|
||||
# 1. Kubernetes creates 3 nginx pods
|
||||
# 2. Each pod runs nginx on port 80
|
||||
# 3. Health checks ensure pods are working
|
||||
# 4. If a pod fails, deployment automatically creates a new one
|
||||
# 5. Load balancer can send traffic to any of the 3 pods
|
||||
|
||||
# 📚 Learn more:
|
||||
# - Deployments: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/
|
||||
# - Rolling Updates: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#rolling-update-deployment
|
||||
# - Scaling: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#scaling-a-deployment
|
||||
# - Rollbacks: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#rolling-back-a-deployment
|
||||
265
docs/examples/ingress-example.yaml
Normal file
265
docs/examples/ingress-example.yaml
Normal file
@ -0,0 +1,265 @@
|
||||
# Enhanced Ingress Example with Detailed Comments
|
||||
# This example shows how to expose your applications externally with SSL/TLS
|
||||
#
|
||||
# 🎯 What this does: Creates an Ingress that:
|
||||
# - Routes traffic from external domains to your services
|
||||
# - Automatically handles SSL/TLS certificates
|
||||
# - Provides load balancing across multiple pods
|
||||
# - Supports path-based routing (different URLs to different services)
|
||||
# - Includes security features like rate limiting and CORS
|
||||
|
||||
# 📊 ASCII Diagram: How Ingress Works in Your Cluster
|
||||
#
|
||||
# ┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
# │ INTERNET │
|
||||
# │ │
|
||||
# │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
|
||||
# │ │ Browser │ │ Mobile │ │ API │ │ Other │ │
|
||||
# │ │ │ │ App │ │ Client │ │ Clients │ │
|
||||
# │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │
|
||||
# │ │ │ │ │ │
|
||||
# │ └────────────────┼───────────────┼───────────────┘ │
|
||||
# │ │ │ │
|
||||
# │ ▼ ▼ │
|
||||
# │ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
# │ │ AZURE LOAD BALANCER │ │
|
||||
# │ │ IP: 4.155.160.32 (prod-usw2-k8s-freeleaps-lb-fe-ip) │ │
|
||||
# │ │ Port: 80/443 │ │
|
||||
# │ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
# │ │ │
|
||||
# │ ▼ │
|
||||
# │ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
# │ │ NGINX INGRESS CONTROLLER │ │
|
||||
# │ │ ┌─────────────────────────────────────────────────────────────┐ │ │
|
||||
# │ │ │ Pod: ingress-nginx-controller-abc123 │ │ │
|
||||
# │ │ │ IP: 10.0.1.100 Port: 80/443 │ │ │
|
||||
# │ │ └─────────────────────────────────────────────────────────────┘ │ │
|
||||
# │ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
# │ │ │
|
||||
# │ ▼ │
|
||||
# │ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
# │ │ INGRESS RULES │ │
|
||||
# │ │ │ │
|
||||
# │ │ argo.mathmast.com → argo-cd-server:80 │ │
|
||||
# │ │ gitea.freeleaps.mathmast.com → gitea-http:3000 │ │
|
||||
# │ │ magicleaps.mathmast.com → magicleaps-frontend-service:80 │ │
|
||||
# │ │ alpha.magicleaps.mathmast.com → magicleaps-frontend-service:80 │ │
|
||||
# │ │ │ │
|
||||
# │ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
# │ │ │
|
||||
# │ ▼ │
|
||||
# │ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
# │ │ KUBERNETES SERVICES │ │
|
||||
# │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
# │ │ │argo-cd-svc │ │gitea-http │ │magic-front │ │magic-api │ │ │
|
||||
# │ │ │ClusterIP │ │ClusterIP │ │ClusterIP │ │ClusterIP │ │ │
|
||||
# │ │ │10.0.1.10 │ │10.0.1.11 │ │10.0.1.12 │ │10.0.1.13 │ │ │
|
||||
# │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
# │ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
# │ │ │
|
||||
# │ ▼ │
|
||||
# │ ┌─────────────────────────────────────────────────────────────────────┐ │
|
||||
# │ │ APPLICATION PODS │ │
|
||||
# │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
# │ │ │argo-cd-pod │ │gitea-pod │ │magic-front │ │magic-api │ │ │
|
||||
# │ │ │10.0.1.101 │ │10.0.1.102 │ │10.0.1.103 │ │10.0.1.104 │ │ │
|
||||
# │ │ │argo-cd:v2.8 │ │gitea:1.20 │ │nginx:latest │ │api:v1.2 │ │ │
|
||||
# │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ │
|
||||
# │ └─────────────────────────────────────────────────────────────────────┘ │
|
||||
# └─────────────────────────────────────────────────────────────────────────────┘
|
||||
#
|
||||
# 🔄 Request Flow Example:
|
||||
# 1. User visits: https://magicleaps.mathmast.com/
|
||||
# 2. DNS resolves to Azure Load Balancer IP (4.155.160.32)
|
||||
# 3. Load Balancer forwards to nginx-ingress-controller
|
||||
# 4. Ingress controller checks rules:
|
||||
# - Host: magicleaps.mathmast.com ✓
|
||||
# - Path: / matches /* prefix ✓
|
||||
# 5. Routes to magicleaps-frontend-service:80
|
||||
# 6. Service load balances to magicleaps-frontend pods
|
||||
# 7. Pod returns response through same path
|
||||
|
||||
# 🔐 SSL/TLS Certificate Flow:
|
||||
# ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
# │ Browser │ │ Ingress │ │cert-manager │ │Let's Encrypt│
|
||||
# │ │ │ Controller │ │ │ │ │
|
||||
# └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘
|
||||
# │ │ │ │
|
||||
# │ HTTPS Request │ │ │
|
||||
# │───────────────▶│ │ │
|
||||
# │ │ Check Cert │ │
|
||||
# │ │───────────────▶│ │
|
||||
# │ │ │ Request Cert │
|
||||
# │ │ │───────────────▶│
|
||||
# │ │ │ │ DNS Challenge
|
||||
# │ │ │ │◀───────────────│
|
||||
# │ │ │ Cert Ready │
|
||||
# │ │ │◀───────────────│
|
||||
# │ │ Cert Ready │ │
|
||||
# │ │◀───────────────│ │
|
||||
# │ HTTPS Response │ │ │
|
||||
# │◀───────────────│ │ │
|
||||
|
||||
# 🏭 YOUR ACTUAL PRODUCTION SETUP
|
||||
#
|
||||
# Based on your codebase, here are your actual production values:
|
||||
#
|
||||
# Load Balancer IP: 4.155.160.32 (prod-usw2-k8s-freeleaps-lb-fe-ip)
|
||||
#
|
||||
# Current Applications:
|
||||
# 1. ArgoCD: argo.mathmast.com → argo-cd-server:80 (freeleaps-devops-system)
|
||||
# 2. Gitea: gitea.freeleaps.mathmast.com → gitea-http:3000 (freeleaps-prod)
|
||||
# 3. Magicleaps Prod: magicleaps.mathmast.com → magicleaps-frontend-service:80 (magicleaps)
|
||||
# 4. Magicleaps Alpha: alpha.magicleaps.mathmast.com → magicleaps-frontend-service:80 (magicleaps)
|
||||
#
|
||||
# Certificate Issuer: mathmast-dot-com (Let's Encrypt + GoDaddy DNS01)
|
||||
# Ingress Controller: nginx-ingress-controller:v1.12.0 (freeleaps-controls-system)
|
||||
#
|
||||
# Commands to check your actual setup:
|
||||
# kubectl get ingress --all-namespaces
|
||||
# kubectl get certificates --all-namespaces
|
||||
# kubectl get pods -n freeleaps-controls-system -l app.kubernetes.io/name=ingress-nginx
|
||||
# curl -I http://4.155.160.32
|
||||
# nslookup argo.mathmast.com
|
||||
# nslookup gitea.freeleaps.mathmast.com
|
||||
# nslookup magicleaps.mathmast.com
|
||||
|
||||
# 📚 EDUCATIONAL EXAMPLE BELOW
|
||||
# This is a generic example for learning purposes. Your actual setup is above.
|
||||
|
||||
# Ingress for external access and routing
|
||||
apiVersion: networking.k8s.io/v1 # ← Kubernetes API version for Ingress resources
|
||||
kind: Ingress # ← Resource type: Ingress (external access layer)
|
||||
metadata: # ← Metadata section: describes the ingress
|
||||
name: web-app-ingress # ← Unique name for this ingress
|
||||
namespace: my-app # ← Namespace where ingress will be created
|
||||
labels: # ← Labels for organizing and selecting ingresses
|
||||
app: web-app # ← Label: identifies this as a web application ingress
|
||||
environment: production # ← Label: environment this runs in
|
||||
annotations: # ← Annotations: configuration for ingress controller
|
||||
# 🔧 Nginx Ingress Controller Annotations
|
||||
# These tell the nginx-ingress-controller how to behave
|
||||
nginx.ingress.kubernetes.io/rewrite-target: / # ← Rewrite URL paths (remove /api prefix)
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true" # ← Redirect HTTP to HTTPS
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true" # ← Force HTTPS redirect
|
||||
|
||||
# 🔐 Cert-Manager Integration
|
||||
# This tells cert-manager to automatically get SSL certificates
|
||||
cert-manager.io/cluster-issuer: "letsencrypt-prod" # ← Use Let's Encrypt for certificates
|
||||
|
||||
# 🛡️ Rate Limiting
|
||||
# Prevent abuse by limiting requests per time window
|
||||
nginx.ingress.kubernetes.io/rate-limit: "100" # ← 100 requests per window
|
||||
nginx.ingress.kubernetes.io/rate-limit-window: "1m" # ← 1 minute window
|
||||
|
||||
# 🌐 CORS (Cross-Origin Resource Sharing)
|
||||
# Allow web browsers to make requests from different domains
|
||||
nginx.ingress.kubernetes.io/enable-cors: "true" # ← Enable CORS
|
||||
nginx.ingress.kubernetes.io/cors-allow-origin: "*" # ← Allow all origins (customize for production)
|
||||
nginx.ingress.kubernetes.io/cors-allow-methods: "GET, POST, PUT, DELETE, OPTIONS" # ← Allowed HTTP methods
|
||||
nginx.ingress.kubernetes.io/cors-allow-headers: "DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" # ← Allowed headers
|
||||
|
||||
# ⚡ Performance Optimizations
|
||||
# These improve performance for your applications
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "0" # ← No limit on request body size
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "600" # ← 10 minute read timeout
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "600" # ← 10 minute send timeout
|
||||
|
||||
spec: # ← Specification: defines routing rules
|
||||
# 🔐 TLS Configuration
|
||||
# This defines SSL/TLS certificates for secure HTTPS connections
|
||||
tls: # ← TLS (Transport Layer Security) configuration
|
||||
- hosts: # ← List of hostnames this certificate covers
|
||||
- myapp.example.com # ← Main application domain
|
||||
- api.myapp.example.com # ← API subdomain
|
||||
secretName: myapp-tls # ← Name of the secret containing the certificate
|
||||
# cert-manager will automatically create this secret with the SSL certificate
|
||||
|
||||
# 🛣️ Routing Rules
|
||||
# These define how traffic is routed to different services
|
||||
rules: # ← List of routing rules
|
||||
# Rule 1: Main application (myapp.example.com)
|
||||
- host: myapp.example.com # ← Hostname to match (like a domain name)
|
||||
http: # ← HTTP traffic configuration
|
||||
paths: # ← List of URL paths and their destinations
|
||||
# Path 1: Root path (/) → web application
|
||||
- path: / # ← URL path to match (root path)
|
||||
pathType: Prefix # ← How to match the path (Prefix = starts with)
|
||||
backend: # ← Where to send the traffic
|
||||
service: # ← Backend service configuration
|
||||
name: web-app-service # ← Service name to route to
|
||||
port: # ← Service port
|
||||
number: 80 # ← Port number (80 = HTTP)
|
||||
|
||||
# Path 2: Static files (/static) → static file server
|
||||
- path: /static # ← URL path to match (static files)
|
||||
pathType: Prefix # ← Match paths starting with /static
|
||||
backend: # ← Where to send the traffic
|
||||
service: # ← Backend service configuration
|
||||
name: static-service # ← Service name for static files
|
||||
port: # ← Service port
|
||||
number: 80 # ← Port number
|
||||
|
||||
# Rule 2: API subdomain (api.myapp.example.com)
|
||||
- host: api.myapp.example.com # ← Different hostname for API
|
||||
http: # ← HTTP traffic configuration
|
||||
paths: # ← List of URL paths and their destinations
|
||||
# Path 1: Root path (/) → main API service
|
||||
- path: / # ← URL path to match (root path)
|
||||
pathType: Prefix # ← How to match the path
|
||||
backend: # ← Where to send the traffic
|
||||
service: # ← Backend service configuration
|
||||
name: api-service # ← Service name for main API
|
||||
port: # ← Service port
|
||||
number: 8080 # ← Port number (8080 = common API port)
|
||||
|
||||
# Path 2: Version 1 API (/v1) → v1 API service
|
||||
- path: /v1 # ← URL path to match (v1 API)
|
||||
pathType: Prefix # ← Match paths starting with /v1
|
||||
backend: # ← Where to send the traffic
|
||||
service: # ← Backend service configuration
|
||||
name: api-v1-service # ← Service name for v1 API
|
||||
port: # ← Service port
|
||||
number: 8080 # ← Port number
|
||||
|
||||
# Path 3: Version 2 API (/v2) → v2 API service
|
||||
- path: /v2 # ← URL path to match (v2 API)
|
||||
pathType: Prefix # ← Match paths starting with /v2
|
||||
backend: # ← Where to send the traffic
|
||||
service: # ← Backend service configuration
|
||||
name: api-v2-service # ← Service name for v2 API
|
||||
port: # ← Service port
|
||||
number: 8080 # ← Port number
|
||||
|
||||
# 🚀 How to use this:
|
||||
# kubectl apply -f ingress-example.yaml
|
||||
# kubectl get ingress # Check ingress status
|
||||
# kubectl describe ingress web-app-ingress # See detailed ingress info
|
||||
# kubectl get certificates # Check SSL certificate status
|
||||
# curl -H "Host: myapp.example.com" http://your-cluster-ip/ # Test routing
|
||||
|
||||
# 📊 What happens when you apply this:
|
||||
# 1. Kubernetes creates the Ingress resource
|
||||
# 2. nginx-ingress-controller reads the Ingress and configures nginx
|
||||
# 3. cert-manager sees the cert-manager.io/cluster-issuer annotation
|
||||
# 4. cert-manager requests SSL certificate from Let's Encrypt
|
||||
# 5. Let's Encrypt validates domain ownership via DNS challenge
|
||||
# 6. Certificate is stored in Kubernetes secret
|
||||
# 7. nginx-ingress-controller uses the certificate for HTTPS
|
||||
# 8. Traffic flows: Internet → Load Balancer → nginx → Services → Pods
|
||||
|
||||
# 🔍 Your Current Setup Analysis:
|
||||
# Based on your codebase, you're using:
|
||||
# - nginx-ingress-controller in freeleaps-controls-system namespace
|
||||
# - cert-manager with Let's Encrypt for SSL certificates
|
||||
# - Custom ingress manager that automatically creates ingresses
|
||||
# - Annotations for SSL redirect, rate limiting, and CORS
|
||||
# - DNS-based certificate validation (DNS01 challenge)
|
||||
|
||||
# 📚 Learn more:
|
||||
# - Ingress: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||
# - nginx-ingress: https://kubernetes.github.io/ingress-nginx/
|
||||
# - cert-manager: https://cert-manager.io/docs/
|
||||
# - SSL/TLS: https://kubernetes.io/docs/concepts/services-networking/ingress/#tls
|
||||
# - Rate Limiting: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/annotations/#rate-limiting
|
||||
# - CORS: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/annotations/#enable-cors
|
||||
162
docs/examples/job-cronjob-example.yaml
Normal file
162
docs/examples/job-cronjob-example.yaml
Normal file
@ -0,0 +1,162 @@
|
||||
# Job for one-time data processing
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: data-processing-job
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: data-processor
|
||||
job-type: batch
|
||||
spec:
|
||||
completions: 3 # Run 3 times
|
||||
parallelism: 2 # Run 2 in parallel
|
||||
backoffLimit: 3 # Retry 3 times on failure
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: data-processor
|
||||
job-type: batch
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
containers:
|
||||
- name: data-processor
|
||||
image: python:3.9-slim
|
||||
command: ["python", "process_data.py"]
|
||||
env:
|
||||
- name: INPUT_FILE
|
||||
value: "/data/input.csv"
|
||||
- name: OUTPUT_FILE
|
||||
value: "/data/output.csv"
|
||||
- name: DB_HOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_HOST
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_PASSWORD
|
||||
volumeMounts:
|
||||
- name: data-volume
|
||||
mountPath: /data
|
||||
- name: script-volume
|
||||
mountPath: /app
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "256Mi"
|
||||
cpu: "500m"
|
||||
volumes:
|
||||
- name: data-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: data-pvc
|
||||
- name: script-volume
|
||||
configMap:
|
||||
name: app-config
|
||||
---
|
||||
# CronJob for scheduled tasks
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: daily-backup
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: backup
|
||||
job-type: scheduled
|
||||
spec:
|
||||
schedule: "0 2 * * *" # Daily at 2 AM
|
||||
concurrencyPolicy: Forbid # Don't run if previous job is still running
|
||||
successfulJobsHistoryLimit: 3 # Keep 3 successful job histories
|
||||
failedJobsHistoryLimit: 1 # Keep 1 failed job history
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: backup
|
||||
job-type: scheduled
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: backup
|
||||
image: postgres:13
|
||||
command: ["/bin/bash", "-c"]
|
||||
args:
|
||||
- |
|
||||
echo "Starting backup at $(date)"
|
||||
pg_dump -h $DB_HOST -U $DB_USER -d $DB_NAME > /backup/backup-$(date +%Y%m%d).sql
|
||||
echo "Backup completed at $(date)"
|
||||
echo "Backup file size: $(ls -lh /backup/backup-$(date +%Y%m%d).sql)"
|
||||
env:
|
||||
- name: PGHOST
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
name: app-config
|
||||
key: DB_HOST
|
||||
- name: PGUSER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_USERNAME
|
||||
- name: PGPASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: DB_PASSWORD
|
||||
- name: PGDATABASE
|
||||
value: "myapp"
|
||||
volumeMounts:
|
||||
- name: backup-volume
|
||||
mountPath: /backup
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
volumes:
|
||||
- name: backup-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: backup-pvc
|
||||
---
|
||||
# CronJob for cleanup tasks
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: cleanup-logs
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: cleanup
|
||||
job-type: maintenance
|
||||
spec:
|
||||
schedule: "0 3 * * 0" # Weekly on Sunday at 3 AM
|
||||
concurrencyPolicy: Allow
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: cleanup
|
||||
job-type: maintenance
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: cleanup
|
||||
image: alpine:latest
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
echo "Starting log cleanup at $(date)"
|
||||
find /logs -name "*.log" -mtime +7 -delete
|
||||
echo "Cleanup completed at $(date)"
|
||||
volumeMounts:
|
||||
- name: logs-volume
|
||||
mountPath: /logs
|
||||
volumes:
|
||||
- name: logs-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: logs-pvc
|
||||
381
docs/examples/kubectl-quick-reference.md
Normal file
381
docs/examples/kubectl-quick-reference.md
Normal file
@ -0,0 +1,381 @@
|
||||
# kubectl Quick Reference Guide
|
||||
|
||||
## 🚀 **Essential Commands for Junior Engineers**
|
||||
|
||||
### **Basic Resource Management**
|
||||
|
||||
```bash
|
||||
# Get resources
|
||||
kubectl get pods
|
||||
kubectl get deployments
|
||||
kubectl get services
|
||||
kubectl get namespaces
|
||||
kubectl get configmaps
|
||||
kubectl get secrets
|
||||
kubectl get pvc
|
||||
kubectl get ingress
|
||||
|
||||
# Get all resources in namespace
|
||||
kubectl get all -n <namespace>
|
||||
|
||||
# Get resources with labels
|
||||
kubectl get pods -l app=web-app
|
||||
kubectl get pods -l environment=production
|
||||
|
||||
# Get resources in wide format
|
||||
kubectl get pods -o wide
|
||||
kubectl get nodes -o wide
|
||||
```
|
||||
|
||||
### **Resource Creation**
|
||||
|
||||
```bash
|
||||
# Create from YAML file
|
||||
kubectl apply -f <file.yaml>
|
||||
|
||||
# Create from directory
|
||||
kubectl apply -f <directory>/
|
||||
|
||||
# Create from URL
|
||||
kubectl apply -f https://raw.githubusercontent.com/...
|
||||
|
||||
# Create resources directly
|
||||
kubectl create namespace my-app
|
||||
kubectl create deployment nginx --image=nginx:latest
|
||||
kubectl create service clusterip nginx --tcp=80:80
|
||||
kubectl create configmap app-config --from-literal=DB_HOST=postgres
|
||||
kubectl create secret generic db-secret --from-literal=DB_PASSWORD=secret123
|
||||
```
|
||||
|
||||
### **Resource Inspection**
|
||||
|
||||
```bash
|
||||
# Describe resources
|
||||
kubectl describe pod <pod-name>
|
||||
kubectl describe deployment <deployment-name>
|
||||
kubectl describe service <service-name>
|
||||
kubectl describe namespace <namespace-name>
|
||||
|
||||
# Get resource YAML
|
||||
kubectl get pod <pod-name> -o yaml
|
||||
kubectl get deployment <deployment-name> -o yaml
|
||||
|
||||
# Get resource in specific format
|
||||
kubectl get pod <pod-name> -o json
|
||||
kubectl get pod <pod-name> -o jsonpath='{.spec.containers[0].image}'
|
||||
```
|
||||
|
||||
### **Logs and Debugging**
|
||||
|
||||
```bash
|
||||
# View logs
|
||||
kubectl logs <pod-name>
|
||||
kubectl logs <pod-name> -f # Follow logs
|
||||
kubectl logs <pod-name> --previous # Previous container
|
||||
kubectl logs <pod-name> --tail=100 # Last 100 lines
|
||||
|
||||
# Execute commands in pods
|
||||
kubectl exec -it <pod-name> -- /bin/bash
|
||||
kubectl exec <pod-name> -- ls /app
|
||||
kubectl exec <pod-name> -- cat /etc/passwd
|
||||
|
||||
# Port forwarding
|
||||
kubectl port-forward <pod-name> 8080:80
|
||||
kubectl port-forward service/<service-name> 8080:80
|
||||
kubectl port-forward deployment/<deployment-name> 8080:80
|
||||
```
|
||||
|
||||
### **Scaling and Updates**
|
||||
|
||||
```bash
|
||||
# Scale deployments
|
||||
kubectl scale deployment <deployment-name> --replicas=5
|
||||
kubectl scale deployment <deployment-name> --replicas=0 # Scale to zero
|
||||
|
||||
# Update deployments
|
||||
kubectl set image deployment/<deployment-name> <container-name>=<new-image>
|
||||
kubectl set image deployment/nginx nginx=nginx:1.21
|
||||
|
||||
# Rollout management
|
||||
kubectl rollout status deployment/<deployment-name>
|
||||
kubectl rollout history deployment/<deployment-name>
|
||||
kubectl rollout undo deployment/<deployment-name>
|
||||
kubectl rollout pause deployment/<deployment-name>
|
||||
kubectl rollout resume deployment/<deployment-name>
|
||||
```
|
||||
|
||||
### **Resource Deletion**
|
||||
|
||||
```bash
|
||||
# Delete resources
|
||||
kubectl delete pod <pod-name>
|
||||
kubectl delete deployment <deployment-name>
|
||||
kubectl delete service <service-name>
|
||||
kubectl delete namespace <namespace-name>
|
||||
|
||||
# Delete from YAML file
|
||||
kubectl delete -f <file.yaml>
|
||||
|
||||
# Delete all resources in namespace
|
||||
kubectl delete all --all -n <namespace>
|
||||
|
||||
# Force delete (use with caution)
|
||||
kubectl delete pod <pod-name> --force --grace-period=0
|
||||
```
|
||||
|
||||
### **Context and Namespace Management**
|
||||
|
||||
```bash
|
||||
# View current context
|
||||
kubectl config current-context
|
||||
|
||||
# List contexts
|
||||
kubectl config get-contexts
|
||||
|
||||
# Switch context
|
||||
kubectl config use-context <context-name>
|
||||
|
||||
# Set default namespace
|
||||
kubectl config set-context --current --namespace=<namespace>
|
||||
|
||||
# View cluster info
|
||||
kubectl cluster-info
|
||||
kubectl cluster-info dump
|
||||
```
|
||||
|
||||
### **Resource Monitoring**
|
||||
|
||||
```bash
|
||||
# Check resource usage
|
||||
kubectl top pods
|
||||
kubectl top nodes
|
||||
kubectl top pods --containers
|
||||
|
||||
# Check events
|
||||
kubectl get events
|
||||
kubectl get events -n <namespace>
|
||||
kubectl get events --sort-by='.lastTimestamp'
|
||||
|
||||
# Check resource quotas
|
||||
kubectl get resourcequota
|
||||
kubectl describe resourcequota <quota-name>
|
||||
```
|
||||
|
||||
### **Troubleshooting Commands**
|
||||
|
||||
```bash
|
||||
# Check node status
|
||||
kubectl get nodes
|
||||
kubectl describe node <node-name>
|
||||
|
||||
# Check service endpoints
|
||||
kubectl get endpoints <service-name>
|
||||
kubectl describe endpoints <service-name>
|
||||
|
||||
# Check persistent volumes
|
||||
kubectl get pv
|
||||
kubectl get pvc
|
||||
kubectl describe pv <pv-name>
|
||||
|
||||
# Check ingress
|
||||
kubectl get ingress
|
||||
kubectl describe ingress <ingress-name>
|
||||
|
||||
# Check jobs and cronjobs
|
||||
kubectl get jobs
|
||||
kubectl get cronjobs
|
||||
kubectl describe job <job-name>
|
||||
kubectl describe cronjob <cronjob-name>
|
||||
```
|
||||
|
||||
### **Useful Aliases**
|
||||
|
||||
```bash
|
||||
# Add to your .bashrc or .zshrc
|
||||
alias k='kubectl'
|
||||
alias kg='kubectl get'
|
||||
alias kd='kubectl describe'
|
||||
alias kl='kubectl logs'
|
||||
alias ke='kubectl exec -it'
|
||||
alias kp='kubectl port-forward'
|
||||
alias ka='kubectl apply -f'
|
||||
alias kdel='kubectl delete'
|
||||
alias kctx='kubectl config use-context'
|
||||
alias kns='kubectl config set-context --current --namespace'
|
||||
```
|
||||
|
||||
### **Common Patterns**
|
||||
|
||||
```bash
|
||||
# Get all pods with their IPs
|
||||
kubectl get pods -o wide
|
||||
|
||||
# Get all services with their endpoints
|
||||
kubectl get services -o wide
|
||||
|
||||
# Get all resources in a namespace
|
||||
kubectl get all -n <namespace>
|
||||
|
||||
# Get resources by label
|
||||
kubectl get pods -l app=web-app,environment=production
|
||||
|
||||
# Get resources sorted by creation time
|
||||
kubectl get pods --sort-by=.metadata.creationTimestamp
|
||||
|
||||
# Get resources in custom columns
|
||||
kubectl get pods -o custom-columns=NAME:.metadata.name,STATUS:.status.phase,AGE:.metadata.creationTimestamp
|
||||
```
|
||||
|
||||
### **Advanced Commands**
|
||||
|
||||
```bash
|
||||
# Patch resources
|
||||
kubectl patch deployment <deployment-name> -p '{"spec":{"replicas":5}}'
|
||||
|
||||
# Edit resources
|
||||
kubectl edit deployment <deployment-name>
|
||||
kubectl edit configmap <configmap-name>
|
||||
|
||||
# Copy files
|
||||
kubectl cp <local-file> <pod-name>:/path/in/pod
|
||||
kubectl cp <pod-name>:/path/in/pod <local-file>
|
||||
|
||||
# Run temporary pods
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- wget -O- <service-name>:<port>
|
||||
|
||||
# Check API resources
|
||||
kubectl api-resources
|
||||
kubectl explain <resource-type>
|
||||
```
|
||||
|
||||
### **Context-Specific Commands**
|
||||
|
||||
```bash
|
||||
# For debugging network issues
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- wget -O- <service-name>:<port>
|
||||
|
||||
# For checking storage
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- ls /data
|
||||
|
||||
# For testing DNS
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- nslookup <service-name>
|
||||
|
||||
# For checking secrets
|
||||
kubectl run test-pod --rm -it --restart=Never --image=busybox -- env | grep DB_
|
||||
```
|
||||
|
||||
## ⚠️ **Bad Practices to Avoid**
|
||||
|
||||
### **❌ DON'T DO THIS**
|
||||
|
||||
```bash
|
||||
# ❌ NEVER use kubectl run for production applications
|
||||
kubectl run my-app --image=my-app:latest --port=8080
|
||||
|
||||
# ❌ NEVER create standalone Pods for services
|
||||
kubectl run database --image=postgres:13 --port=5432
|
||||
|
||||
# ❌ NEVER use imperative commands for production
|
||||
kubectl run nginx --image=nginx:latest
|
||||
|
||||
# ❌ NEVER delete Pods directly (they'll be recreated by Deployment)
|
||||
kubectl delete pod <pod-name>
|
||||
|
||||
# ❌ NEVER use --force without understanding the consequences
|
||||
kubectl delete pod <pod-name> --force --grace-period=0
|
||||
```
|
||||
|
||||
### **✅ DO THIS INSTEAD**
|
||||
|
||||
```bash
|
||||
# ✅ Use Deployments for applications
|
||||
kubectl create deployment my-app --image=my-app:latest
|
||||
|
||||
# ✅ Use Helm charts for complex applications
|
||||
helm install my-app ./my-app-chart --namespace my-app
|
||||
|
||||
# ✅ Use kubectl apply for declarative deployments
|
||||
kubectl apply -f deployment.yaml
|
||||
|
||||
# ✅ Use StatefulSets for databases
|
||||
kubectl apply -f statefulset.yaml
|
||||
|
||||
# ✅ Delete Deployments, not Pods
|
||||
kubectl delete deployment <deployment-name>
|
||||
|
||||
# ✅ Use proper resource management
|
||||
kubectl scale deployment <deployment-name> --replicas=0
|
||||
```
|
||||
|
||||
### **🔧 When `kubectl run` is Acceptable**
|
||||
|
||||
```bash
|
||||
# ✅ OK: One-time debugging pods
|
||||
kubectl run debug-pod --image=busybox --rm -it --restart=Never -- nslookup my-service
|
||||
|
||||
# ✅ OK: Temporary testing
|
||||
kubectl run test-pod --image=nginx --rm -it --restart=Never -- curl http://my-service:80
|
||||
|
||||
# ✅ OK: Quick experiments (development only)
|
||||
kubectl run temp-pod --image=nginx --port=80
|
||||
|
||||
# ✅ OK: Troubleshooting network issues
|
||||
kubectl run test-pod --image=busybox --rm -it --restart=Never -- wget -O- my-service:80
|
||||
```
|
||||
|
||||
## 🏭 **Your Codebase Best Practices**
|
||||
|
||||
### **Your Actual Commands**
|
||||
```bash
|
||||
# 🏭 REAL COMMANDS FROM YOUR CODEBASE
|
||||
# From freeleaps-devops-reconciler/scripts/deploy.sh
|
||||
|
||||
# Helm deployment (primary method)
|
||||
helm install/upgrade "$RELEASE_NAME" . \
|
||||
--namespace "$NAMESPACE" \
|
||||
--create-namespace \
|
||||
-f "$VALUES_FILE" \
|
||||
--set "image.tag=$IMAGE_TAG"
|
||||
|
||||
# kubectl apply (secondary method)
|
||||
kubectl apply -f <directory>/
|
||||
|
||||
# Status checking
|
||||
kubectl get pods -n "$NAMESPACE" -l "app.kubernetes.io/name=freeleaps-devops-reconciler"
|
||||
kubectl logs -n "$NAMESPACE" deployment/"$RELEASE_NAME"
|
||||
```
|
||||
|
||||
### **Best Practices**
|
||||
|
||||
1. **Always use namespaces** to organize resources
|
||||
2. **Use labels** for better resource management
|
||||
3. **Set resource limits** on all containers
|
||||
4. **Use health checks** for reliability
|
||||
5. **Use ConfigMaps and Secrets** for configuration
|
||||
6. **Test changes** in a staging environment first
|
||||
7. **Keep kubectl updated** to match your cluster version
|
||||
8. **Use Deployments, not standalone Pods**
|
||||
9. **Use Helm charts for complex applications**
|
||||
10. **Use declarative YAML files**
|
||||
|
||||
### **Common Mistakes to Avoid**
|
||||
|
||||
```bash
|
||||
# ❌ Don't do this
|
||||
kubectl run nginx --image=nginx # Creates a pod, not a deployment
|
||||
|
||||
# ✅ Do this instead
|
||||
kubectl create deployment nginx --image=nginx
|
||||
|
||||
# ❌ Don't do this
|
||||
kubectl delete pod <pod-name> # Pod will be recreated by deployment
|
||||
|
||||
# ✅ Do this instead
|
||||
kubectl delete deployment <deployment-name>
|
||||
|
||||
# ❌ Don't do this
|
||||
kubectl exec <pod-name> -- rm -rf / # Dangerous command
|
||||
|
||||
# ✅ Do this instead
|
||||
kubectl exec <pod-name> -- ls / # Safe inspection command
|
||||
```
|
||||
44
docs/examples/namespace-with-pvc.yaml
Normal file
44
docs/examples/namespace-with-pvc.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
# Create namespace
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: my-app
|
||||
labels:
|
||||
environment: development
|
||||
team: backend
|
||||
---
|
||||
# Create PVC
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: app-storage
|
||||
namespace: my-app
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
storageClassName: managed-premium
|
||||
---
|
||||
# Create pod with PVC
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: app-with-storage
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: my-app
|
||||
spec:
|
||||
containers:
|
||||
- name: app
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
volumeMounts:
|
||||
- name: app-storage
|
||||
mountPath: /app/data
|
||||
volumes:
|
||||
- name: app-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: app-storage
|
||||
150
docs/examples/resource-management-example.yaml
Normal file
150
docs/examples/resource-management-example.yaml
Normal file
@ -0,0 +1,150 @@
|
||||
# Namespace with Resource Quota
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: production
|
||||
labels:
|
||||
environment: production
|
||||
team: platform
|
||||
---
|
||||
# Resource Quota for the namespace
|
||||
apiVersion: v1
|
||||
kind: ResourceQuota
|
||||
metadata:
|
||||
name: production-quota
|
||||
namespace: production
|
||||
spec:
|
||||
hard:
|
||||
# CPU and Memory limits
|
||||
requests.cpu: "8" # 8 CPU cores total
|
||||
requests.memory: 16Gi # 16GB memory total
|
||||
limits.cpu: "16" # 16 CPU cores max
|
||||
limits.memory: 32Gi # 32GB memory max
|
||||
|
||||
# Resource counts
|
||||
pods: "50" # 50 pods max
|
||||
services: "20" # 20 services max
|
||||
persistentvolumeclaims: "20" # 20 PVCs max
|
||||
configmaps: "50" # 50 ConfigMaps max
|
||||
secrets: "50" # 50 Secrets max
|
||||
|
||||
# Storage
|
||||
requests.storage: 100Gi # 100GB storage total
|
||||
---
|
||||
# Limit Range for default limits
|
||||
apiVersion: v1
|
||||
kind: LimitRange
|
||||
metadata:
|
||||
name: production-limits
|
||||
namespace: production
|
||||
spec:
|
||||
limits:
|
||||
# Default limits for containers
|
||||
- default:
|
||||
memory: 512Mi
|
||||
cpu: 500m
|
||||
defaultRequest:
|
||||
memory: 256Mi
|
||||
cpu: 250m
|
||||
type: Container
|
||||
# Default limits for pods
|
||||
- default:
|
||||
memory: 1Gi
|
||||
cpu: 1000m
|
||||
type: Pod
|
||||
---
|
||||
# Deployment with proper resource management
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: resource-managed-app
|
||||
namespace: production
|
||||
labels:
|
||||
app: resource-managed-app
|
||||
environment: production
|
||||
spec:
|
||||
replicas: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: resource-managed-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: resource-managed-app
|
||||
environment: production
|
||||
spec:
|
||||
# Pod-level security context
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 2000
|
||||
containers:
|
||||
- name: app
|
||||
image: nginx:latest
|
||||
ports:
|
||||
- containerPort: 80
|
||||
# Resource requests and limits
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi" # Minimum guaranteed
|
||||
cpu: "250m" # 0.25 CPU cores
|
||||
limits:
|
||||
memory: "512Mi" # Maximum allowed
|
||||
cpu: "500m" # 0.5 CPU cores
|
||||
# Health checks
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 80
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 80
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
# Container-level security context
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: tmp-volume
|
||||
mountPath: /tmp
|
||||
volumes:
|
||||
- name: tmp-volume
|
||||
emptyDir: {}
|
||||
---
|
||||
# Horizontal Pod Autoscaler (HPA)
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: app-hpa
|
||||
namespace: production
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: resource-managed-app
|
||||
minReplicas: 3
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 80
|
||||
54
docs/examples/service-example.yaml
Normal file
54
docs/examples/service-example.yaml
Normal file
@ -0,0 +1,54 @@
|
||||
# ClusterIP Service (Internal Access)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-service
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: web-app
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
---
|
||||
# NodePort Service (External Access via Node)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-nodeport
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: web-app
|
||||
spec:
|
||||
type: NodePort
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 80
|
||||
nodePort: 30080
|
||||
protocol: TCP
|
||||
---
|
||||
# LoadBalancer Service (Cloud Load Balancer)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: web-app-lb
|
||||
namespace: my-app
|
||||
labels:
|
||||
app: web-app
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
selector:
|
||||
app: web-app
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
||||
190
docs/node_config.env.template
Normal file
190
docs/node_config.env.template
Normal file
@ -0,0 +1,190 @@
|
||||
# Azure Kubernetes Node Addition Configuration Template
|
||||
# Copy this file to node_config.env and update the values
|
||||
|
||||
# =============================================================================
|
||||
# VM Configuration
|
||||
# =============================================================================
|
||||
|
||||
# VM Name (as it appears in Azure)
|
||||
VM_NAME="prod-usw2-k8s-freeleaps-worker-nodes-06"
|
||||
|
||||
# Azure Resource Group containing the VM
|
||||
RESOURCE_GROUP="k8s"
|
||||
|
||||
# Node Type: worker or master
|
||||
NODE_TYPE="worker"
|
||||
|
||||
# =============================================================================
|
||||
# Authentication Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Ansible user for SSH connections
|
||||
# Note: This should be wwwadmin@mathmast.com for your environment
|
||||
ANSIBLE_USER="wwwadmin@mathmast.com"
|
||||
|
||||
# SSH Password (will be prompted during execution)
|
||||
# Leave empty to be prompted during script execution
|
||||
SSH_PASSWORD=""
|
||||
|
||||
# Sudo Password (will be prompted during execution)
|
||||
# Leave empty to be prompted during script execution
|
||||
SUDO_PASSWORD=""
|
||||
|
||||
# =============================================================================
|
||||
# Network Configuration
|
||||
# =============================================================================
|
||||
|
||||
# VM Private IP Address (will be auto-detected if left empty)
|
||||
# Leave empty to auto-detect from Azure
|
||||
VM_PRIVATE_IP=""
|
||||
|
||||
# Network Security Group name (for troubleshooting)
|
||||
NSG_NAME="k8s-nsg"
|
||||
|
||||
# Subnet name (for troubleshooting)
|
||||
SUBNET_NAME="k8s-subnet"
|
||||
|
||||
# =============================================================================
|
||||
# Kubernetes Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Kubernetes cluster name
|
||||
CLUSTER_NAME="freeleaps"
|
||||
|
||||
# Kubernetes version (should match existing cluster)
|
||||
KUBERNETES_VERSION="1.31.4"
|
||||
|
||||
# Container runtime (should match existing cluster)
|
||||
CONTAINER_RUNTIME="docker"
|
||||
|
||||
# =============================================================================
|
||||
# Paths and Directories
|
||||
# =============================================================================
|
||||
|
||||
# Path to inventory file
|
||||
INVENTORY_FILE="freeleaps-ops/cluster/ansible/manifests/inventory.ini"
|
||||
|
||||
# Path to kubespray directory
|
||||
KUBESPRAY_DIR="freeleaps-ops/3rd/kubespray"
|
||||
|
||||
# Path to group_vars directory
|
||||
GROUP_VARS_DIR="freeleaps-ops/cluster/ansible/manifests/group_vars"
|
||||
|
||||
# =============================================================================
|
||||
# Script Behavior Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Enable verbose output (true/false)
|
||||
VERBOSE="false"
|
||||
|
||||
# Enable dry run mode (true/false)
|
||||
# When enabled, script will show what it would do without making changes
|
||||
DRY_RUN="false"
|
||||
|
||||
# Maximum wait time for node to appear (seconds)
|
||||
MAX_WAIT_TIME="300"
|
||||
|
||||
# Maximum wait time for node to be ready (seconds)
|
||||
MAX_READY_WAIT_TIME="600"
|
||||
|
||||
# =============================================================================
|
||||
# Backup and Recovery Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Enable automatic backup of inventory file (true/false)
|
||||
ENABLE_BACKUP="true"
|
||||
|
||||
# Number of backup files to keep
|
||||
BACKUP_RETENTION="5"
|
||||
|
||||
# Backup directory
|
||||
BACKUP_DIR="./backups"
|
||||
|
||||
# =============================================================================
|
||||
# Monitoring and Alerting Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Enable post-addition health checks (true/false)
|
||||
ENABLE_HEALTH_CHECKS="true"
|
||||
|
||||
# Enable pod scheduling test (true/false)
|
||||
ENABLE_POD_TEST="true"
|
||||
|
||||
# Test pod image
|
||||
TEST_POD_IMAGE="nginx:latest"
|
||||
|
||||
# =============================================================================
|
||||
# Troubleshooting Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Enable detailed logging (true/false)
|
||||
ENABLE_LOGGING="true"
|
||||
|
||||
# Log file path
|
||||
LOG_FILE="./node_addition.log"
|
||||
|
||||
# Enable SSH connection testing (true/false)
|
||||
ENABLE_SSH_TEST="true"
|
||||
|
||||
# SSH timeout (seconds)
|
||||
SSH_TIMEOUT="10"
|
||||
|
||||
# =============================================================================
|
||||
# Advanced Configuration
|
||||
# =============================================================================
|
||||
|
||||
# Ansible playbook timeout (seconds)
|
||||
ANSIBLE_TIMEOUT="3600"
|
||||
|
||||
# Kubectl timeout (seconds)
|
||||
KUBECTL_TIMEOUT="300"
|
||||
|
||||
# Azure CLI timeout (seconds)
|
||||
AZURE_TIMEOUT="300"
|
||||
|
||||
# =============================================================================
|
||||
# Validation Rules
|
||||
# =============================================================================
|
||||
|
||||
# Allowed node types
|
||||
ALLOWED_NODE_TYPES="worker,master"
|
||||
|
||||
# Required VM name pattern
|
||||
VM_NAME_PATTERN="^[a-zA-Z0-9-]+$"
|
||||
|
||||
# Required resource group pattern
|
||||
RESOURCE_GROUP_PATTERN="^[a-zA-Z0-9-]+$"
|
||||
|
||||
# =============================================================================
|
||||
# Notes and Instructions
|
||||
# =============================================================================
|
||||
|
||||
# IMPORTANT NOTES:
|
||||
# 1. This template should be copied to node_config.env before use
|
||||
# 2. Update the values according to your environment
|
||||
# 3. Passwords will be prompted during execution for security
|
||||
# 4. The script will create backups automatically
|
||||
# 5. All paths are relative to the script execution directory
|
||||
|
||||
# USAGE:
|
||||
# 1. Copy this template: cp node_config.env.template node_config.env
|
||||
# 2. Edit the configuration: vim node_config.env
|
||||
# 3. Run the script: ./add_k8s_node.sh
|
||||
|
||||
# SECURITY NOTES:
|
||||
# - Never commit passwords to version control
|
||||
# - Use SSH keys when possible
|
||||
# - Regularly rotate passwords
|
||||
# - Monitor access logs
|
||||
|
||||
# TROUBLESHOOTING:
|
||||
# - Check VM power state in Azure
|
||||
# - Verify network security group rules
|
||||
# - Ensure SSH service is running on VM
|
||||
# - Check firewall rules if applicable
|
||||
# - Verify DNS resolution
|
||||
|
||||
# SUPPORT:
|
||||
# - Infrastructure Team: [Contact Information]
|
||||
# - Kubernetes Administrators: [Contact Information]
|
||||
# - Azure Support: [Contact Information]
|
||||
@ -1,9 +1,9 @@
|
||||
replicaCount: 1
|
||||
reconciler:
|
||||
image:
|
||||
repository: freeleaps/reconciler
|
||||
repository: null
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "snapshot-29b6c88"
|
||||
tag: snapshot-22a1169
|
||||
registry: docker.io
|
||||
name: reconciler
|
||||
imagePullSecrets: []
|
||||
|
||||
@ -16,7 +16,7 @@ chat:
|
||||
registry: docker.io
|
||||
repository: null
|
||||
name: chat
|
||||
tag: snapshot-b2c4e72
|
||||
tag: snapshot-29f543c
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
|
||||
@ -10,7 +10,7 @@ devops:
|
||||
registry: docker.io
|
||||
repository: null
|
||||
name: devops
|
||||
tag: snapshot-e16c1dc
|
||||
tag: snapshot-45b4ebd
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
@ -37,23 +37,23 @@ devops:
|
||||
interval: 30s
|
||||
scrapeTimeout: ''
|
||||
ingresses:
|
||||
- name: devops-ingress
|
||||
host: devops.freeleaps-alpha.com
|
||||
class: nginx
|
||||
tls:
|
||||
exists: false
|
||||
issuerRef:
|
||||
name: freeleaps-alpha-dot-com
|
||||
kind: ClusterIssuer
|
||||
name: devops.freeleaps-alpha.com-cert
|
||||
rules:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: devops-service
|
||||
port:
|
||||
number: 8014
|
||||
- name: devops-ingress
|
||||
host: devops.freeleaps-alpha.com
|
||||
class: nginx
|
||||
tls:
|
||||
exists: false
|
||||
issuerRef:
|
||||
name: freeleaps-alpha-dot-com
|
||||
kind: ClusterIssuer
|
||||
name: devops.freeleaps-alpha.com-cert
|
||||
rules:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: devops-service
|
||||
port:
|
||||
number: 8014
|
||||
configs:
|
||||
tz: UTC
|
||||
appName: devops
|
||||
@ -86,18 +86,18 @@ devops:
|
||||
kind: FreeleapsSecretStore
|
||||
name: freeleaps-main-secret-store
|
||||
target:
|
||||
name: "freeleaps-devops-alpha-secrets"
|
||||
creationPolicy: "Owner"
|
||||
name: freeleaps-devops-alpha-secrets
|
||||
creationPolicy: Owner
|
||||
refreshInterval: 30s
|
||||
data:
|
||||
- key: appMongodbUri
|
||||
remoteRef:
|
||||
key: "freeleaps-alpha-mongodb-uri"
|
||||
type: Secret
|
||||
- key: rabbitmqPassword
|
||||
remoteRef:
|
||||
key: "freeleaps-alpha-rabbitmq-password"
|
||||
type: Secret
|
||||
- key: appMongodbUri
|
||||
remoteRef:
|
||||
key: freeleaps-alpha-mongodb-uri
|
||||
type: Secret
|
||||
- key: rabbitmqPassword
|
||||
remoteRef:
|
||||
key: freeleaps-alpha-rabbitmq-password
|
||||
type: Secret
|
||||
vpa:
|
||||
minAllowed:
|
||||
enabled: false
|
||||
|
||||
@ -25,8 +25,8 @@ data:
|
||||
FREELEAPS_AUTHENTICATION_ENDPOINT: {{ .Values.freeleaps.configs.freeleapsAuthenticationEndpoint | b64enc | quote }}
|
||||
FREELEAPS_AILAB_ENDPOINT: {{ .Values.freeleaps.configs.freeleapsAilabEndpoint | b64enc | quote }}
|
||||
FREELEAPS_NOTIFICATION_ENDPOINT: {{ .Values.freeleaps.configs.freeleapsNotificationEndpoint | b64enc | quote }}
|
||||
FREELEAPS_ENV: {{ .Values.freeleaps.configs.freeleapsEnv | b64enc | quote }}
|
||||
CERT_PATH: {{ .Values.freeleaps.configs.certPath | b64enc | quote }}
|
||||
FREELEAPS_DEVOPS_ENDPOINT: {{ .Values.freeleaps.configs.freeleapsDevopsEndpoint | b64enc | quote }}
|
||||
APP_ENV: {{ .Values.freeleaps.configs.appEnv | b64enc | quote }}
|
||||
REDIS_IS_CLUSTER: {{ .Values.freeleaps.configs.redisIsCluster | b64enc | quote }}
|
||||
METRICS_ENABLED: {{ .Values.freeleaps.configs.metricsEnabled | default false | toString | b64enc }}
|
||||
PROBES_ENABLED: {{ .Values.freeleaps.configs.probesEnabled | default false | toString | b64enc }}
|
||||
|
||||
@ -16,7 +16,7 @@ freeleaps:
|
||||
registry: docker.io
|
||||
repository: null
|
||||
name: backend
|
||||
tag: snapshot-9c7bb61
|
||||
tag: snapshot-29f543c
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
@ -75,9 +75,9 @@ freeleaps:
|
||||
freeleapsPaymentEndpoint: http://payment-service.freeleaps-alpha.svc.freeleaps.cluster:8006/api/payment/
|
||||
freeleapsAuthenticationEndpoint: http://authentication-service.freeleaps-alpha.svc.freeleaps.cluster:8004/api/auth/
|
||||
freeleapsNotificationEndpoint: http://notification-service.freeleaps-alpha.svc.freeleaps.cluster:8003/api/notification/
|
||||
freeleapsDevopsEndpoint: http://devops-service.freeleaps-alpha.svc.freeleaps.cluster:8014/api
|
||||
freeleapsAilabEndpoint: ''
|
||||
freeleapsEnv: alpha
|
||||
certPath: ''
|
||||
appEnv: alpha
|
||||
redisIsCluster: 'false'
|
||||
metricsEnabled: 'false'
|
||||
probesEnabled: 'true'
|
||||
|
||||
@ -66,9 +66,10 @@ freeleaps:
|
||||
freeleapsPaymentEndpoint: http://payment-service.freeleaps-prod.svc.freeleaps.cluster:8006/api/payment/
|
||||
freeleapsAuthenticationEndpoint: http://authentication-service.freeleaps-prod.svc.freeleaps.cluster:8004/api/auth/
|
||||
freeleapsNotificationEndpoint: http://notification-service.freeleaps-prod.svc.freeleaps.cluster:8003/api/notification/
|
||||
freeleapsDevopsEndpoint: http://devops-service.freeleaps-prod.svc.freeleaps.cluster:8014/api
|
||||
freeleapsAilabEndpoint: ''
|
||||
freeleapsEnv: alpha
|
||||
certPath: ''
|
||||
appEnv: prod
|
||||
|
||||
redisIsCluster: 'true'
|
||||
metricsEnabled: 'true'
|
||||
probesEnabled: 'true'
|
||||
@ -82,38 +83,38 @@ freeleaps:
|
||||
creationPolicy: Owner
|
||||
refreshInterval: 30s
|
||||
data:
|
||||
- key: mongodbUri
|
||||
remoteRef:
|
||||
key: freeleaps-prod-mongodb-uri
|
||||
type: Secret
|
||||
- key: jwtSecretKey
|
||||
remoteRef:
|
||||
key: freeleaps-prod-jwt-secret-key
|
||||
type: Secret
|
||||
- key: stripeApiKey
|
||||
remoteRef:
|
||||
key: freeleaps-prod-stripe-api-key
|
||||
type: Secret
|
||||
- key: stripeWebhookSecret
|
||||
remoteRef:
|
||||
key: freeleaps-prod-stripe-webhook-secret
|
||||
type: Secret
|
||||
- key: stripeAccountWebhookSecret
|
||||
remoteRef:
|
||||
key: freeleaps-prod-stripe-account-webhook-secret
|
||||
type: Secret
|
||||
- key: rabbitmqPassword
|
||||
remoteRef:
|
||||
key: freeleaps-prod-rabbitmq-password
|
||||
type: Secret
|
||||
- key: redisUrl
|
||||
remoteRef:
|
||||
key: freeleaps-prod-redis-url
|
||||
type: Secret
|
||||
- key: giteaApiKey
|
||||
remoteRef:
|
||||
key: freeleaps-prod-gitea-api-key
|
||||
type: Secret
|
||||
- key: mongodbUri
|
||||
remoteRef:
|
||||
key: freeleaps-prod-mongodb-uri
|
||||
type: Secret
|
||||
- key: jwtSecretKey
|
||||
remoteRef:
|
||||
key: freeleaps-prod-jwt-secret-key
|
||||
type: Secret
|
||||
- key: stripeApiKey
|
||||
remoteRef:
|
||||
key: freeleaps-prod-stripe-api-key
|
||||
type: Secret
|
||||
- key: stripeWebhookSecret
|
||||
remoteRef:
|
||||
key: freeleaps-prod-stripe-webhook-secret
|
||||
type: Secret
|
||||
- key: stripeAccountWebhookSecret
|
||||
remoteRef:
|
||||
key: freeleaps-prod-stripe-account-webhook-secret
|
||||
type: Secret
|
||||
- key: rabbitmqPassword
|
||||
remoteRef:
|
||||
key: freeleaps-prod-rabbitmq-password
|
||||
type: Secret
|
||||
- key: redisUrl
|
||||
remoteRef:
|
||||
key: freeleaps-prod-redis-url
|
||||
type: Secret
|
||||
- key: giteaApiKey
|
||||
remoteRef:
|
||||
key: freeleaps-prod-gitea-api-key
|
||||
type: Secret
|
||||
vpa:
|
||||
minAllowed:
|
||||
enabled: true
|
||||
|
||||
@ -89,10 +89,8 @@ freeleaps:
|
||||
freeleapsNotificationEndpoint: ""
|
||||
# FREELEAPS_AILAB_ENDPOINT
|
||||
freeleapsAilabEndpoint: ""
|
||||
# FREELEAPS_ENV
|
||||
freeleapsEnv: ""
|
||||
# CERT_PATH
|
||||
certPath: ""
|
||||
# APP_ENV
|
||||
appEnv: ""
|
||||
# REDIS_IS_CLUSTER
|
||||
redisIsCluster: "false"
|
||||
# METRICS_ENABLED
|
||||
|
||||
@ -8,7 +8,7 @@ frontend:
|
||||
registry: docker.io
|
||||
repository: null
|
||||
name: frontend
|
||||
tag: snapshot-9c7bb61
|
||||
tag: snapshot-29f543c
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
|
||||
@ -15,7 +15,7 @@ notification:
|
||||
registry: docker.io
|
||||
repository: null
|
||||
name: notification
|
||||
tag: snapshot-165e10c
|
||||
tag: snapshot-4f09a5e
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
|
||||
@ -1,24 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: magicleaps-alpha
|
||||
namespace: freeleaps-devops-system
|
||||
spec:
|
||||
destination:
|
||||
name: ''
|
||||
namespace: magicleaps-alpha
|
||||
server: https://kubernetes.default.svc
|
||||
source:
|
||||
path: magicleaps/helm-pkg/magicleaps
|
||||
repoURL: https://freeleaps@dev.azure.com/freeleaps/freeleaps-ops/_git/freeleaps-ops
|
||||
targetRevision: HEAD
|
||||
helm:
|
||||
parameters: []
|
||||
valueFiles:
|
||||
- values.alpha.yaml
|
||||
sources: []
|
||||
project: magicleaps-alpha
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
@ -1,10 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: AppProject
|
||||
metadata:
|
||||
name: magicleaps-alpha
|
||||
namespace: freeleaps-devops-system
|
||||
spec:
|
||||
destinations:
|
||||
- name: in-cluster
|
||||
namespace: magicleaps-alpha
|
||||
server: https://kubernetes.default.svc
|
||||
@ -1,7 +1,7 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: magicleaps-authentication-config
|
||||
name: {{ .Release.Name }}-authentication-config
|
||||
namespace: {{ .Release.Namespace }}
|
||||
type: Opaque
|
||||
data:
|
||||
|
||||
@ -10,7 +10,7 @@ metadata:
|
||||
annotations:
|
||||
opentelemetry.io/config-checksum: {{ include (print $.Template.BasePath "/authentication/opentelemetry.yaml") . | sha256sum }}
|
||||
{{- end }}
|
||||
name: "magicleaps-authentication"
|
||||
name: "{{ .Release.Name }}"
|
||||
namespace: {{ .Release.Namespace | quote }}
|
||||
spec:
|
||||
selector:
|
||||
|
||||
@ -54,14 +54,14 @@ authentication:
|
||||
failureThreshold: 3
|
||||
terminationGracePeriodSeconds: 30
|
||||
services:
|
||||
- name: magicleaps-authentication-service
|
||||
- name: magicleaps-authentication-alpha-service
|
||||
type: ClusterIP
|
||||
port: 8015
|
||||
targetPort: 8015
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
ingresses:
|
||||
- name: magicleaps-authentication-ingress
|
||||
- name: magicleaps-authentication-alpha-ingress
|
||||
host: authentication.alpha.magicleaps.mathmast.com
|
||||
class: nginx
|
||||
rules:
|
||||
@ -69,7 +69,7 @@ authentication:
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: magicleaps-authentication-service
|
||||
name: magicleaps-authentication-alpha-service
|
||||
port:
|
||||
number: 8015
|
||||
tls:
|
||||
@ -95,17 +95,17 @@ authentication:
|
||||
kind: FreeleapsSecretStore
|
||||
name: freeleaps-main-secret-store
|
||||
target:
|
||||
name: "freeleaps-authentication-alpha-secrets"
|
||||
name: "magicleaps-authentication-alpha-secrets"
|
||||
creationPolicy: "Owner"
|
||||
refreshInterval: 30s
|
||||
data:
|
||||
- key: jwtSecretKey
|
||||
remoteRef:
|
||||
key: "freeleaps-alpha-jwt-secret-key"
|
||||
key: "magicleaps-alpha-jwt-secret-key"
|
||||
type: Secret
|
||||
- key: mongodbUri
|
||||
remoteRef:
|
||||
key: "freeleaps-alpha-mongodb-uri"
|
||||
key: "magicleaps-alpha-mongodb-uri"
|
||||
type: Secret
|
||||
vpa:
|
||||
minAllowed:
|
||||
|
||||
@ -13,7 +13,7 @@ logIngest:
|
||||
logPathPattern: /app/log/authentication/*.log
|
||||
logPath: /app/log/authentication
|
||||
authentication:
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
image:
|
||||
registry: docker.io
|
||||
repository: null
|
||||
@ -54,22 +54,22 @@ authentication:
|
||||
failureThreshold: 3
|
||||
terminationGracePeriodSeconds: 30
|
||||
services:
|
||||
- name: magicleaps-authentication-service
|
||||
- name: magicleaps-authentication-prod-service
|
||||
type: ClusterIP
|
||||
port: 8015
|
||||
targetPort: 8015
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
ingresses:
|
||||
- name: magicleaps-authentication-ingress
|
||||
host: authentication.magicleaps.mathmast.com
|
||||
- name: magicleaps-authentication-prod-ingress
|
||||
host: authentication.prod.magicleaps.mathmast.com
|
||||
class: nginx
|
||||
rules:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: magicleaps-authentication-service
|
||||
name: magicleaps-authentication-prod-service
|
||||
port:
|
||||
number: 8015
|
||||
tls:
|
||||
@ -77,7 +77,7 @@ authentication:
|
||||
issuerRef:
|
||||
name: mathmast-dot-com
|
||||
kind: ClusterIssuer
|
||||
name: magicleaps-authentication-ingress-tls
|
||||
name: magicleaps-authentication-prod-ingress-tls
|
||||
configs:
|
||||
tz: America/Settle
|
||||
appName: magicleaps-authentication
|
||||
@ -95,17 +95,17 @@ authentication:
|
||||
kind: FreeleapsSecretStore
|
||||
name: freeleaps-main-secret-store
|
||||
target:
|
||||
name: "freeleaps-authentication-prod-secrets"
|
||||
name: "magicleaps-authentication-prod-secrets"
|
||||
creationPolicy: "Owner"
|
||||
refreshInterval: 30s
|
||||
data:
|
||||
- key: jwtSecretKey
|
||||
remoteRef:
|
||||
key: "freeleaps-prod-jwt-secret-key"
|
||||
key: "magicleaps-prod-jwt-secret-key"
|
||||
type: Secret
|
||||
- key: mongodbUri
|
||||
remoteRef:
|
||||
key: "freeleaps-prod-mongodb-uri"
|
||||
key: "magicleaps-prod-mongodb-uri"
|
||||
type: Secret
|
||||
vpa:
|
||||
minAllowed:
|
||||
|
||||
@ -62,7 +62,7 @@ authentication:
|
||||
enabled: false
|
||||
ingresses:
|
||||
- name: magicleaps-authentication-ingress
|
||||
host: authentication.magicleaps.mathmast.com
|
||||
host: authentication.default.magicleaps.mathmast.com
|
||||
class: nginx
|
||||
rules:
|
||||
- path: /
|
||||
|
||||
@ -28,6 +28,10 @@ spec:
|
||||
- name: "backend"
|
||||
image: "{{ coalesce .Values.backend.image.registry .Values.global.registry "docker.io"}}/{{ coalesce .Values.backend.image.repository .Values.global.repository }}/{{ .Values.backend.image.name }}:{{ .Values.backend.image.tag | default "latest" }}"
|
||||
imagePullPolicy: {{ .Values.backend.image.imagePullPolicy | default "IfNotPresent" }}
|
||||
{{- if .Values.backend.securityContext }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.backend.securityContext | nindent 12 }}
|
||||
{{- end }}
|
||||
ports:
|
||||
{{- range $port := .Values.backend.ports }}
|
||||
- containerPort: {{ $port.containerPort }}
|
||||
@ -96,3 +100,58 @@ spec:
|
||||
name: magicleaps-backend-config
|
||||
key: {{ $key | snakecase | upper }}
|
||||
{{- end }}
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
volumeMounts:
|
||||
- name: app-logs
|
||||
mountPath: {{ .Values.logIngest.backendLogPath }}
|
||||
{{- end }}
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
- name: opentelemetry-collector
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:latest
|
||||
command:
|
||||
- /otelcol-contrib
|
||||
- --config=/etc/otelcol-contrib/otelcol-contrib.yaml
|
||||
volumeMounts:
|
||||
- name: app-logs
|
||||
mountPath: {{ .Values.logIngest.backendLogPath }}
|
||||
- name: otelcol-config
|
||||
mountPath: /etc/otelcol-contrib
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
env:
|
||||
- name: KUBE_META_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: KUBE_META_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: KUBE_META_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: KUBE_META_POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
- name: KUBE_META_POD_UID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.uid
|
||||
- name: KUBE_META_OBJECT_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.labels['app.kubernetes.io/instance']
|
||||
{{- end }}
|
||||
volumes:
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
- name: app-logs
|
||||
emptyDir: {}
|
||||
- name: otelcol-config
|
||||
configMap:
|
||||
name: {{ .Release.Name }}-backend-otelcol-config
|
||||
{{- end }}
|
||||
@ -0,0 +1,53 @@
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-backend-otel-collector
|
||||
namespace: {{ .Release.Namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-backend-otel-collector
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes", "nodes/proxy", "services", "endpoints", "pods", "events"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["replicasets", "deployments"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources: ["replicasets"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["statefulsets"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources: ["horizontalpodautoscalers"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources: ["ingresses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["authentication.k8s.io"]
|
||||
resources: ["tokenreviews"]
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["authorization.k8s.io"]
|
||||
resources: ["subjectaccessreviews"]
|
||||
verbs: ["create"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-backend-otel-collector
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ .Release.Name }}-backend-otel-collector
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ .Release.Name }}-backend-otel-collector
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- end }}
|
||||
@ -0,0 +1,123 @@
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-backend-opentelemetry-collector
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
mode: sidecar
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:latest
|
||||
serviceAccount: "{{ .Release.Name }}-backend-otel-collector"
|
||||
volumeMounts:
|
||||
- name: app-logs
|
||||
mountPath: {{ .Values.logIngest.backendLogPath }}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
env:
|
||||
- name: KUBE_META_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: KUBE_META_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: KUBE_META_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: KUBE_META_POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
- name: KUBE_META_POD_UID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.uid
|
||||
- name: KUBE_META_OBJECT_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.labels['app.kubernetes.io/instance']
|
||||
config:
|
||||
receivers:
|
||||
filelog:
|
||||
include:
|
||||
- {{ .Values.logIngest.backendLogPathPattern }}
|
||||
start_at: end
|
||||
include_file_path: false
|
||||
include_file_name: false
|
||||
operators:
|
||||
- type: json_parser
|
||||
parse_from: body
|
||||
parse_to: attributes
|
||||
processors:
|
||||
resource:
|
||||
attributes:
|
||||
- action: insert
|
||||
key: k8s.node.name
|
||||
value: ${KUBE_META_NODE_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.name
|
||||
value: ${KUBE_META_POD_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.ip
|
||||
value: ${KUBE_META_POD_IP}
|
||||
- action: insert
|
||||
key: k8s.pod.uid
|
||||
value: ${KUBE_META_POD_UID}
|
||||
- action: insert
|
||||
key: k8s.namespace.name
|
||||
value: ${KUBE_META_NAMESPACE}
|
||||
- action: insert
|
||||
key: k8s.deployment.name
|
||||
value: ${KUBE_META_OBJECT_NAME}
|
||||
- action: insert
|
||||
key: service.name
|
||||
value: magicleaps-backend
|
||||
- action: insert
|
||||
key: service.component
|
||||
value: backend
|
||||
transform:
|
||||
log_statements:
|
||||
- context: log
|
||||
statements:
|
||||
- set(resource.attributes["application"], log.attributes["context"]["app"])
|
||||
- set(resource.attributes["environment"], log.attributes["context"]["env"])
|
||||
- set(resource.attributes["kubernetes_node_name"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["kubernetes_pod_name"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["kubernetes_pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["kubernetes_deployment_name"], resource.attributes["k8s.deployment.name"])
|
||||
- set(resource.attributes["kubernetes_namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"], ParseJSON(log.body))
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_uid"], resource.attributes["k8s.pod.uid"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["deployment"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["node"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["service"]["name"], "magicleaps-backend")
|
||||
- set(resource.attributes["body_json"]["service"]["component"], "backend")
|
||||
- set(log.body, resource.attributes["body_json"])
|
||||
- delete_key(resource.attributes, "body_json")
|
||||
batch:
|
||||
send_batch_size: 5
|
||||
timeout: 10s
|
||||
exporters:
|
||||
otlphttp/logs:
|
||||
endpoint: {{ .Values.logIngest.lokiEndpoint }}/otlp
|
||||
tls:
|
||||
insecure: true
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
level: info
|
||||
pipelines:
|
||||
logs:
|
||||
receivers: [filelog]
|
||||
processors: [resource, transform, batch]
|
||||
exporters: [otlphttp/logs]
|
||||
{{- end }}
|
||||
@ -0,0 +1,87 @@
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-backend-otelcol-config
|
||||
namespace: {{ .Release.Namespace }}
|
||||
data:
|
||||
otelcol-contrib.yaml: |
|
||||
receivers:
|
||||
filelog:
|
||||
include:
|
||||
- {{ .Values.logIngest.backendLogPathPattern }}
|
||||
start_at: end
|
||||
include_file_path: false
|
||||
include_file_name: false
|
||||
operators:
|
||||
- type: json_parser
|
||||
parse_from: body
|
||||
parse_to: attributes
|
||||
processors:
|
||||
resource:
|
||||
attributes:
|
||||
- action: insert
|
||||
key: k8s.node.name
|
||||
value: ${KUBE_META_NODE_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.name
|
||||
value: ${KUBE_META_POD_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.ip
|
||||
value: ${KUBE_META_POD_IP}
|
||||
- action: insert
|
||||
key: k8s.pod.uid
|
||||
value: ${KUBE_META_POD_UID}
|
||||
- action: insert
|
||||
key: k8s.namespace.name
|
||||
value: ${KUBE_META_NAMESPACE}
|
||||
- action: insert
|
||||
key: k8s.deployment.name
|
||||
value: ${KUBE_META_OBJECT_NAME}
|
||||
- action: insert
|
||||
key: service.name
|
||||
value: magicleaps-backend
|
||||
- action: insert
|
||||
key: service.component
|
||||
value: backend
|
||||
transform:
|
||||
log_statements:
|
||||
- context: log
|
||||
statements:
|
||||
- set(resource.attributes["application"], log.attributes["context"]["app"])
|
||||
- set(resource.attributes["environment"], log.attributes["context"]["env"])
|
||||
- set(resource.attributes["kubernetes_node_name"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["kubernetes_pod_name"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["kubernetes_pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["kubernetes_deployment_name"], resource.attributes["k8s.deployment.name"])
|
||||
- set(resource.attributes["kubernetes_namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"], ParseJSON(log.body))
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_uid"], resource.attributes["k8s.pod.uid"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["deployment"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["node"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["service"]["name"], "magicleaps-backend")
|
||||
- set(resource.attributes["body_json"]["service"]["component"], "backend")
|
||||
- set(log.body, resource.attributes["body_json"])
|
||||
- delete_key(resource.attributes, "body_json")
|
||||
batch:
|
||||
send_batch_size: 5
|
||||
timeout: 10s
|
||||
exporters:
|
||||
otlphttp/logs:
|
||||
endpoint: {{ .Values.logIngest.lokiEndpoint }}/otlp
|
||||
tls:
|
||||
insecure: true
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
level: info
|
||||
pipelines:
|
||||
logs:
|
||||
receivers: [filelog]
|
||||
processors: [resource, transform, batch]
|
||||
exporters: [otlphttp/logs]
|
||||
{{- end }}
|
||||
@ -96,3 +96,58 @@ spec:
|
||||
name: magicleaps-frontend-config
|
||||
key: {{ $key | snakecase | upper }}
|
||||
{{- end }}
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
volumeMounts:
|
||||
- name: app-logs
|
||||
mountPath: {{ .Values.logIngest.frontendLogPath }}
|
||||
{{- end }}
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
- name: opentelemetry-collector
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:latest
|
||||
command:
|
||||
- /otelcol-contrib
|
||||
- --config=/etc/otelcol-contrib/otelcol-contrib.yaml
|
||||
volumeMounts:
|
||||
- name: app-logs
|
||||
mountPath: {{ .Values.logIngest.frontendLogPath }}
|
||||
- name: otelcol-config
|
||||
mountPath: /etc/otelcol-contrib
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
env:
|
||||
- name: KUBE_META_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: KUBE_META_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: KUBE_META_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: KUBE_META_POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
- name: KUBE_META_POD_UID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.uid
|
||||
- name: KUBE_META_OBJECT_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.labels['app.kubernetes.io/instance']
|
||||
{{- end }}
|
||||
volumes:
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
- name: app-logs
|
||||
emptyDir: {}
|
||||
- name: otelcol-config
|
||||
configMap:
|
||||
name: {{ .Release.Name }}-frontend-otelcol-config
|
||||
{{- end }}
|
||||
@ -0,0 +1,53 @@
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-frontend-otel-collector
|
||||
namespace: {{ .Release.Namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-frontend-otel-collector
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes", "nodes/proxy", "services", "endpoints", "pods", "events"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["replicasets", "deployments"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources: ["replicasets"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["statefulsets"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources: ["horizontalpodautoscalers"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources: ["ingresses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["authentication.k8s.io"]
|
||||
resources: ["tokenreviews"]
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["authorization.k8s.io"]
|
||||
resources: ["subjectaccessreviews"]
|
||||
verbs: ["create"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-frontend-otel-collector
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ .Release.Name }}-frontend-otel-collector
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ .Release.Name }}-frontend-otel-collector
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- end }}
|
||||
@ -0,0 +1,123 @@
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-frontend-opentelemetry-collector
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
mode: sidecar
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:latest
|
||||
serviceAccount: "{{ .Release.Name }}-frontend-otel-collector"
|
||||
volumeMounts:
|
||||
- name: app-logs
|
||||
mountPath: {{ .Values.logIngest.frontendLogPath }}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: true
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
env:
|
||||
- name: KUBE_META_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: KUBE_META_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: KUBE_META_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: KUBE_META_POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
- name: KUBE_META_POD_UID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.uid
|
||||
- name: KUBE_META_OBJECT_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.labels['app.kubernetes.io/instance']
|
||||
config:
|
||||
receivers:
|
||||
filelog:
|
||||
include:
|
||||
- {{ .Values.logIngest.frontendLogPathPattern }}
|
||||
start_at: end
|
||||
include_file_path: false
|
||||
include_file_name: false
|
||||
operators:
|
||||
- type: json_parser
|
||||
parse_from: body
|
||||
parse_to: attributes
|
||||
processors:
|
||||
resource:
|
||||
attributes:
|
||||
- action: insert
|
||||
key: k8s.node.name
|
||||
value: ${KUBE_META_NODE_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.name
|
||||
value: ${KUBE_META_POD_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.ip
|
||||
value: ${KUBE_META_POD_IP}
|
||||
- action: insert
|
||||
key: k8s.pod.uid
|
||||
value: ${KUBE_META_POD_UID}
|
||||
- action: insert
|
||||
key: k8s.namespace.name
|
||||
value: ${KUBE_META_NAMESPACE}
|
||||
- action: insert
|
||||
key: k8s.deployment.name
|
||||
value: ${KUBE_META_OBJECT_NAME}
|
||||
- action: insert
|
||||
key: service.name
|
||||
value: magicleaps-frontend
|
||||
- action: insert
|
||||
key: service.component
|
||||
value: frontend
|
||||
transform:
|
||||
log_statements:
|
||||
- context: log
|
||||
statements:
|
||||
- set(resource.attributes["application"], log.attributes["context"]["app"])
|
||||
- set(resource.attributes["environment"], log.attributes["context"]["env"])
|
||||
- set(resource.attributes["kubernetes_node_name"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["kubernetes_pod_name"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["kubernetes_pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["kubernetes_deployment_name"], resource.attributes["k8s.deployment.name"])
|
||||
- set(resource.attributes["kubernetes_namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"], ParseJSON(log.body))
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_uid"], resource.attributes["k8s.pod.uid"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["deployment"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["node"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["service"]["name"], "magicleaps-frontend")
|
||||
- set(resource.attributes["body_json"]["service"]["component"], "frontend")
|
||||
- set(log.body, resource.attributes["body_json"])
|
||||
- delete_key(resource.attributes, "body_json")
|
||||
batch:
|
||||
send_batch_size: 5
|
||||
timeout: 10s
|
||||
exporters:
|
||||
otlphttp/logs:
|
||||
endpoint: {{ .Values.logIngest.lokiEndpoint }}/otlp
|
||||
tls:
|
||||
insecure: true
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
level: info
|
||||
pipelines:
|
||||
logs:
|
||||
receivers: [filelog]
|
||||
processors: [resource, transform, batch]
|
||||
exporters: [otlphttp/logs]
|
||||
{{- end }}
|
||||
@ -0,0 +1,87 @@
|
||||
{{- if .Values.logIngest.enabled }}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-frontend-otelcol-config
|
||||
namespace: {{ .Release.Namespace }}
|
||||
data:
|
||||
otelcol-contrib.yaml: |
|
||||
receivers:
|
||||
filelog:
|
||||
include:
|
||||
- {{ .Values.logIngest.frontendLogPathPattern }}
|
||||
start_at: end
|
||||
include_file_path: false
|
||||
include_file_name: false
|
||||
operators:
|
||||
- type: json_parser
|
||||
parse_from: body
|
||||
parse_to: attributes
|
||||
processors:
|
||||
resource:
|
||||
attributes:
|
||||
- action: insert
|
||||
key: k8s.node.name
|
||||
value: ${KUBE_META_NODE_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.name
|
||||
value: ${KUBE_META_POD_NAME}
|
||||
- action: insert
|
||||
key: k8s.pod.ip
|
||||
value: ${KUBE_META_POD_IP}
|
||||
- action: insert
|
||||
key: k8s.pod.uid
|
||||
value: ${KUBE_META_POD_UID}
|
||||
- action: insert
|
||||
key: k8s.namespace.name
|
||||
value: ${KUBE_META_NAMESPACE}
|
||||
- action: insert
|
||||
key: k8s.deployment.name
|
||||
value: ${KUBE_META_OBJECT_NAME}
|
||||
- action: insert
|
||||
key: service.name
|
||||
value: magicleaps-frontend
|
||||
- action: insert
|
||||
key: service.component
|
||||
value: frontend
|
||||
transform:
|
||||
log_statements:
|
||||
- context: log
|
||||
statements:
|
||||
- set(resource.attributes["application"], log.attributes["context"]["app"])
|
||||
- set(resource.attributes["environment"], log.attributes["context"]["env"])
|
||||
- set(resource.attributes["kubernetes_node_name"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["kubernetes_pod_name"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["kubernetes_pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["kubernetes_deployment_name"], resource.attributes["k8s.deployment.name"])
|
||||
- set(resource.attributes["kubernetes_namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"], ParseJSON(log.body))
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_ip"], resource.attributes["k8s.pod.ip"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["pod_uid"], resource.attributes["k8s.pod.uid"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["deployment"], resource.attributes["k8s.pod.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["node"], resource.attributes["k8s.node.name"])
|
||||
- set(resource.attributes["body_json"]["kubernetes"]["namespace"], resource.attributes["k8s.namespace.name"])
|
||||
- set(resource.attributes["body_json"]["service"]["name"], "magicleaps-frontend")
|
||||
- set(resource.attributes["body_json"]["service"]["component"], "frontend")
|
||||
- set(log.body, resource.attributes["body_json"])
|
||||
- delete_key(resource.attributes, "body_json")
|
||||
batch:
|
||||
send_batch_size: 5
|
||||
timeout: 10s
|
||||
exporters:
|
||||
otlphttp/logs:
|
||||
endpoint: {{ .Values.logIngest.lokiEndpoint }}/otlp
|
||||
tls:
|
||||
insecure: true
|
||||
service:
|
||||
telemetry:
|
||||
logs:
|
||||
level: info
|
||||
pipelines:
|
||||
logs:
|
||||
receivers: [filelog]
|
||||
processors: [resource, transform, batch]
|
||||
exporters: [otlphttp/logs]
|
||||
{{- end }}
|
||||
@ -2,6 +2,13 @@ global:
|
||||
registry: docker.io
|
||||
repository: sunzhenyucn
|
||||
nodeSelector: {}
|
||||
logIngest:
|
||||
enabled: true
|
||||
lokiEndpoint: http://loki-gateway.magicleaps-logging-system
|
||||
backendLogPathPattern: /app/log/*.log
|
||||
backendLogPath: /app/log
|
||||
frontendLogPathPattern: /app/logs/*.log
|
||||
frontendLogPath: /app/logs
|
||||
frontend:
|
||||
replicas: 1
|
||||
image:
|
||||
@ -65,6 +72,12 @@ backend:
|
||||
name: magicleaps-backend
|
||||
tag: snapshot-004a6c7
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
allowPrivilegeEscalation: true
|
||||
readOnlyRootFilesystem: false
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8081
|
||||
@ -104,6 +117,6 @@ backend:
|
||||
twilioAccountSid: ''
|
||||
twilioAuthToken: ''
|
||||
eveluationTaskFolderBase: temp/interview/eveluation_task/
|
||||
logDir: logs
|
||||
logDir: /app/log
|
||||
appLogFile: app.log
|
||||
appLogLevel: INFO
|
||||
|
||||
@ -2,6 +2,13 @@ global:
|
||||
registry: docker.io
|
||||
repository: sunzhenyucn
|
||||
nodeSelector: {}
|
||||
logIngest:
|
||||
enabled: true
|
||||
lokiEndpoint: http://loki-gateway.magicleaps-logging-system
|
||||
backendLogPathPattern: /app/log/*.log
|
||||
backendLogPath: /app/log
|
||||
frontendLogPathPattern: /app/logs/*.log
|
||||
frontendLogPath: /app/logs
|
||||
frontend:
|
||||
replicas: 1
|
||||
image:
|
||||
@ -65,6 +72,12 @@ backend:
|
||||
name: magicleaps-backend
|
||||
tag: 1.0.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
allowPrivilegeEscalation: true
|
||||
readOnlyRootFilesystem: false
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8081
|
||||
@ -104,6 +117,6 @@ backend:
|
||||
twilioAccountSid: ''
|
||||
twilioAuthToken: ''
|
||||
eveluationTaskFolderBase: temp/interview/eveluation_task/
|
||||
logDir: logs
|
||||
logDir: /app/log
|
||||
appLogFile: app.log
|
||||
appLogLevel: INFO
|
||||
|
||||
@ -3,6 +3,13 @@ global:
|
||||
registry: docker.io
|
||||
repository: sunzhenyucn
|
||||
nodeSelector: {}
|
||||
logIngest:
|
||||
enabled: true
|
||||
lokiEndpoint: http://loki-gateway.magicleaps-logging-system
|
||||
backendLogPathPattern: /app/log/*.log
|
||||
backendLogPath: /app/log
|
||||
frontendLogPathPattern: /app/logs/*.log
|
||||
frontendLogPath: /app/logs
|
||||
frontend:
|
||||
replicas: 1
|
||||
image:
|
||||
@ -66,6 +73,12 @@ backend:
|
||||
name: magicleaps-backend
|
||||
tag: 1.0.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
allowPrivilegeEscalation: true
|
||||
readOnlyRootFilesystem: false
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8081
|
||||
@ -105,6 +118,6 @@ backend:
|
||||
twilioAccountSid: ""
|
||||
twilioAuthToken: ""
|
||||
eveluationTaskFolderBase: "temp/interview/eveluation_task/"
|
||||
logDir: "logs"
|
||||
logDir: "/app/log"
|
||||
appLogFile: "app.log"
|
||||
appLogLevel: "INFO"
|
||||
@ -1,24 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: magicleaps
|
||||
namespace: freeleaps-devops-system
|
||||
spec:
|
||||
destination:
|
||||
name: ""
|
||||
namespace: magicleaps
|
||||
server: https://kubernetes.default.svc
|
||||
source:
|
||||
path: magicleaps/helm-pkg/magicleaps
|
||||
repoURL: https://freeleaps@dev.azure.com/freeleaps/freeleaps-ops/_git/freeleaps-ops
|
||||
targetRevision: HEAD
|
||||
helm:
|
||||
parameters: []
|
||||
valueFiles:
|
||||
- values.prod.yaml
|
||||
sources: []
|
||||
project: magicleaps
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
@ -1,10 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: AppProject
|
||||
metadata:
|
||||
name: magicleaps
|
||||
namespace: freeleaps-devops-system
|
||||
spec:
|
||||
destinations:
|
||||
- name: in-cluster
|
||||
namespace: magicleaps
|
||||
server: https://kubernetes.default.svc
|
||||
Loading…
Reference in New Issue
Block a user