feat: Завершена реализация универсальной лаборатории
- Добавлена полная поддержка Istio service mesh с Kiali - Интегрированы Helm charts (nginx, prometheus-stack) - Созданы Grafana дашборды для Istio мониторинга - Добавлен HTML генератор отчетов с красивым дизайном - Созданы скрипты для снапшотов и восстановления - Добавлена поддержка Istio Bookinfo demo - Обновлена документация с полным описанием возможностей Компоненты: - Istio с Telemetry и Traffic Policy - Prometheus + Grafana с автопровижинингом дашбордов - HTML отчеты с анализом статусов - Снапшоты и восстановление состояния - Полная интеграция с Kubernetes Автор: Сергей Антропов Сайт: https://devops.org.ru
This commit is contained in:
69
files/grafana/dashboards/istio-overview.json
Normal file
69
files/grafana/dashboards/istio-overview.json
Normal file
@@ -0,0 +1,69 @@
|
||||
{
|
||||
"id": null,
|
||||
"uid": "istio-overview",
|
||||
"title": "Istio • Overview",
|
||||
"schemaVersion": 36,
|
||||
"version": 1,
|
||||
"timezone": "browser",
|
||||
"tags": ["istio", "sre", "mesh"],
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Global RPS",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(istio_requests_total{reporter=\"destination\"}[5m]))",
|
||||
"legendFormat": "rps"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Success Rate",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 6, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(istio_requests_total{reporter=\"destination\",response_code!~\"5..\"}[5m])) / sum(rate(istio_requests_total{reporter=\"destination\"}[5m]))",
|
||||
"legendFormat": "success"
|
||||
}
|
||||
],
|
||||
"options": {"reduceOptions":{"calcs":["lastNotNull"]},"colorMode":"value","graphMode":"none"},
|
||||
"fieldConfig":{"defaults":{"unit":"percentunit","min":0,"max":1}}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Latency (ms) p50/p95/p99",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 4},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum(rate(istio_request_duration_milliseconds_bucket{reporter=\"destination\"}[5m])) by (le))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum(rate(istio_request_duration_milliseconds_bucket{reporter=\"destination\"}[5m])) by (le))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum(rate(istio_request_duration_milliseconds_bucket{reporter=\"destination\"}[5m])) by (le))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
],
|
||||
"fieldConfig":{"defaults":{"unit":"ms"}}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "RPS by Workload",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 12},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (destination_workload) (rate(istio_requests_total{reporter=\"destination\"}[5m]))",
|
||||
"legendFormat": "{{destination_workload}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
}
|
||||
}
|
||||
80
files/grafana/dashboards/service-sli.json
Normal file
80
files/grafana/dashboards/service-sli.json
Normal file
@@ -0,0 +1,80 @@
|
||||
{
|
||||
"id": null,
|
||||
"uid": "service-sli",
|
||||
"title": "Service • SLI",
|
||||
"schemaVersion": 36,
|
||||
"version": 1,
|
||||
"timezone": "browser",
|
||||
"tags": ["istio", "sre", "sli"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "namespace",
|
||||
"type": "query",
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"query": "label_values(istio_requests_total, destination_namespace)",
|
||||
"refresh": 1
|
||||
},
|
||||
{
|
||||
"name": "workload",
|
||||
"type": "query",
|
||||
"datasource": "${DS_PROMETHEUS}",
|
||||
"query": "label_values(istio_requests_total{destination_namespace=\"$namespace\"}, destination_workload)",
|
||||
"refresh": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"type": "stat",
|
||||
"title": "Success Rate",
|
||||
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 0},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(istio_requests_total{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\",response_code!~\"5..\"}[5m])) / sum(rate(istio_requests_total{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\"}[5m]))",
|
||||
"legendFormat": "success"
|
||||
}
|
||||
],
|
||||
"fieldConfig":{"defaults":{"unit":"percentunit","min":0,"max":1}}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "Latency (ms) p50/p95/p99",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 4},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "histogram_quantile(0.5, sum by (le) (rate(istio_request_duration_milliseconds_bucket{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\"}[5m])))",
|
||||
"legendFormat": "p50"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(istio_request_duration_milliseconds_bucket{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\"}[5m])))",
|
||||
"legendFormat": "p95"
|
||||
},
|
||||
{
|
||||
"expr": "histogram_quantile(0.99, sum by (le) (rate(istio_request_duration_milliseconds_bucket{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\"}[5m])))",
|
||||
"legendFormat": "p99"
|
||||
}
|
||||
],
|
||||
"fieldConfig":{"defaults":{"unit":"ms"}}
|
||||
},
|
||||
{
|
||||
"type": "timeseries",
|
||||
"title": "RPS (2xx/4xx/5xx)",
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 12},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(istio_requests_total{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\",response_code=~\"2..\"}[5m]))",
|
||||
"legendFormat": "2xx"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(istio_requests_total{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\",response_code=~\"4..\"}[5m]))",
|
||||
"legendFormat": "4xx"
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(istio_requests_total{reporter=\"destination\",destination_namespace=\"$namespace\",destination_workload=\"$workload\",response_code=~\"5..\"}[5m]))",
|
||||
"legendFormat": "5xx"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
39
files/k8s/istio/telemetry.yaml
Normal file
39
files/k8s/istio/telemetry.yaml
Normal file
@@ -0,0 +1,39 @@
|
||||
---
|
||||
# Istio Telemetry для сбора метрик
|
||||
# Автор: Сергей Антропов
|
||||
# Сайт: https://devops.org.ru
|
||||
|
||||
apiVersion: telemetry.istio.io/v1
|
||||
kind: Telemetry
|
||||
metadata:
|
||||
name: mesh-default
|
||||
namespace: istio-system
|
||||
spec:
|
||||
selector: {}
|
||||
metrics:
|
||||
- providers:
|
||||
- name: prometheus
|
||||
overrides:
|
||||
- match:
|
||||
metric: REQUEST_DURATION
|
||||
tagOverrides:
|
||||
"destination_workload": { operation: UPSERT, value: "%DESTINATION_WORKLOAD%" }
|
||||
"destination_namespace": { operation: UPSERT, value: "%DESTINATION_NAMESPACE%" }
|
||||
"request_host": { operation: UPSERT, value: "%REQUEST_HOST%" }
|
||||
histogram:
|
||||
buckets:
|
||||
- 1
|
||||
- 5
|
||||
- 10
|
||||
- 25
|
||||
- 50
|
||||
- 100
|
||||
- 250
|
||||
- 500
|
||||
- 1000
|
||||
- 2000
|
||||
- 5000
|
||||
- match:
|
||||
metric: REQUEST_COUNT
|
||||
tagOverrides:
|
||||
"response_code": { operation: UPSERT, value: "%RESPONSE_CODE%" }
|
||||
66
files/k8s/istio/trafficpolicy.yaml
Normal file
66
files/k8s/istio/trafficpolicy.yaml
Normal file
@@ -0,0 +1,66 @@
|
||||
---
|
||||
# Istio Traffic Policy для управления трафиком
|
||||
# Автор: Сергей Антропов
|
||||
# Сайт: https://devops.org.ru
|
||||
|
||||
# mesh-wide mTLS STRICT
|
||||
apiVersion: security.istio.io/v1
|
||||
kind: PeerAuthentication
|
||||
metadata:
|
||||
name: default
|
||||
namespace: istio-system
|
||||
spec:
|
||||
mtls:
|
||||
mode: STRICT
|
||||
|
||||
---
|
||||
# Пример строгой политики для bookinfo (pool + outlier)
|
||||
apiVersion: networking.istio.io/v1
|
||||
kind: DestinationRule
|
||||
metadata:
|
||||
name: productpage-policy
|
||||
namespace: bookinfo
|
||||
spec:
|
||||
host: productpage.bookinfo.svc.cluster.local
|
||||
trafficPolicy:
|
||||
tls:
|
||||
mode: ISTIO_MUTUAL
|
||||
connectionPool:
|
||||
tcp:
|
||||
maxConnections: 100
|
||||
http:
|
||||
http1MaxPendingRequests: 1000
|
||||
maxRequestsPerConnection: 100
|
||||
outlierDetection:
|
||||
consecutive5xx: 5
|
||||
interval: 5s
|
||||
baseEjectionTime: 30s
|
||||
maxEjectionPercent: 50
|
||||
|
||||
---
|
||||
apiVersion: networking.istio.io/v1
|
||||
kind: DestinationRule
|
||||
metadata:
|
||||
name: reviews-policy
|
||||
namespace: bookinfo
|
||||
spec:
|
||||
host: reviews.bookinfo.svc.cluster.local
|
||||
subsets:
|
||||
- name: v1
|
||||
labels: { version: v1 }
|
||||
- name: v2
|
||||
labels: { version: v2 }
|
||||
trafficPolicy:
|
||||
tls:
|
||||
mode: ISTIO_MUTUAL
|
||||
connectionPool:
|
||||
tcp:
|
||||
maxConnections: 100
|
||||
http:
|
||||
http1MaxPendingRequests: 1000
|
||||
maxRequestsPerConnection: 100
|
||||
outlierDetection:
|
||||
consecutive5xx: 3
|
||||
interval: 5s
|
||||
baseEjectionTime: 30s
|
||||
maxEjectionPercent: 50
|
||||
Reference in New Issue
Block a user