feat: добавить аддоны Loki, Promtail, Tempo, Pushgateway
Loki (grafana/loki 6.7.3): - Монолитный режим (singleBinary) — оптимально для малых кластеров - Хранение: filesystem по умолчанию, автоматически S3 если addon_minio=true (создаёт бакеты chunks/ruler/admin в MinIO через Job) - Retention 30 дней, отключён self-monitoring (без Grafana Agent Operator) - ServiceMonitor + Grafana datasource ConfigMap при addon_prometheus_stack=true - Datasource включает derivedFields → Tempo если addon_tempo=true Promtail (grafana/promtail 6.16.4): - DaemonSet на всех нодах (tolerations: Exists) - Отправляет логи в Loki по http://loki.loki.svc.cluster.local:3100 - ServiceMonitor при addon_prometheus_stack=true Tempo (grafana/tempo 1.10.3): - Монолитный режим с PVC для трейсов - Receivers: OTLP gRPC/HTTP, Jaeger, опционально Zipkin - Grafana datasource ConfigMap при addon_prometheus_stack=true (tracesToLogsV2 → Loki если addon_loki=true, serviceMap → Prometheus) - ServiceMonitor при addon_prometheus_stack=true Pushgateway (prometheus-community/prometheus-pushgateway 2.14.0): - Устанавливается в monitoring namespace (рядом с Prometheus) - ServiceMonitor при addon_prometheus_stack=true - Опциональные persistence и ingress
This commit is contained in:
18
Makefile
18
Makefile
@@ -54,6 +54,7 @@ DOCKER_RUN := docker run --rm -it \
|
||||
addon-argocd addon-longhorn addon-kubernetes-dashboard \
|
||||
addon-postgresql addon-mysql addon-databasus \
|
||||
addon-minio addon-velero addon-crowdsec \
|
||||
addon-loki addon-promtail addon-tempo addon-pushgateway \
|
||||
add-node remove-node \
|
||||
add-etcd-node remove-etcd-node \
|
||||
etcd-backup etcd-restore etcd-list-snapshots \
|
||||
@@ -333,6 +334,23 @@ addon-crowdsec: _check_env _check_image ## Установить CrowdSec — о
|
||||
@printf "$(CYAN)Устанавливаю CrowdSec...$(NC)\n"
|
||||
$(DOCKER_RUN) addon crowdsec $(ARGS)
|
||||
|
||||
# ── Observability (logging / tracing / metrics) ───────────────────────────────
|
||||
addon-loki: _check_env _check_image ## Установить Loki — агрегация логов (ARGS="-e loki_storage_type=s3" для MinIO)
|
||||
@printf "$(CYAN)Устанавливаю Loki...$(NC)\n"
|
||||
$(DOCKER_RUN) addon loki $(ARGS)
|
||||
|
||||
addon-promtail: _check_env _check_image ## Установить Promtail — агент сбора логов → Loki
|
||||
@printf "$(CYAN)Устанавливаю Promtail...$(NC)\n"
|
||||
$(DOCKER_RUN) addon promtail $(ARGS)
|
||||
|
||||
addon-tempo: _check_env _check_image ## Установить Tempo — distributed tracing (OTLP/Jaeger/Zipkin)
|
||||
@printf "$(CYAN)Устанавливаю Tempo...$(NC)\n"
|
||||
$(DOCKER_RUN) addon tempo $(ARGS)
|
||||
|
||||
addon-pushgateway: _check_env _check_image ## Установить Prometheus Pushgateway — метрики batch-задач и скриптов
|
||||
@printf "$(CYAN)Устанавливаю Pushgateway...$(NC)\n"
|
||||
$(DOCKER_RUN) addon pushgateway $(ARGS)
|
||||
|
||||
# Generic цель — любой аддон из addons/<name>/playbook.yml
|
||||
addon-%: _check_env _check_image
|
||||
@if [ ! -f "addons/$*/playbook.yml" ]; then \
|
||||
|
||||
7
addons/loki/playbook.yml
Normal file
7
addons/loki/playbook.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Install Loki
|
||||
hosts: k3s_master[0]
|
||||
gather_facts: false
|
||||
become: true
|
||||
roles:
|
||||
- role: "{{ playbook_dir }}/role"
|
||||
44
addons/loki/role/defaults/main.yml
Normal file
44
addons/loki/role/defaults/main.yml
Normal file
@@ -0,0 +1,44 @@
|
||||
---
|
||||
loki_version: "6.7.3"
|
||||
loki_namespace: "loki"
|
||||
loki_chart_repo: "https://grafana.github.io/helm-charts"
|
||||
|
||||
# Количество реплик (singleBinary — монолитный режим, оптимально для малых кластеров)
|
||||
loki_replicas: 1
|
||||
|
||||
# Тип хранилища: filesystem | s3
|
||||
# Автоматически s3 если addon_minio установлен
|
||||
loki_storage_type: "{{ 's3' if addon_minio | default(false) | bool else 'filesystem' }}"
|
||||
loki_storage_size: "10Gi"
|
||||
loki_storage_class: "" # "" = default StorageClass
|
||||
|
||||
# MinIO/S3 (используются если loki_storage_type == 's3')
|
||||
loki_s3_endpoint: "http://minio.minio.svc.cluster.local:9000"
|
||||
loki_s3_bucket_chunks: "loki-chunks"
|
||||
loki_s3_bucket_ruler: "loki-ruler"
|
||||
loki_s3_bucket_admin: "loki-admin"
|
||||
loki_s3_access_key: "{{ vault_minio_root_user | default('minioadmin') }}"
|
||||
loki_s3_secret_key: "{{ vault_minio_root_password | default('changeme-minio') }}"
|
||||
loki_s3_region: "us-east-1"
|
||||
|
||||
# Срок хранения логов
|
||||
loki_retention_period: "720h" # 30 дней
|
||||
|
||||
# Ingress (обычно не нужен — Grafana обращается напрямую внутри кластера)
|
||||
loki_ingress_enabled: false
|
||||
loki_ingress_host: "loki.local"
|
||||
loki_ingress_class: "{{ ingress_nginx_class_name | default('nginx') }}"
|
||||
loki_ingress_tls: false
|
||||
loki_ingress_cert_issuer: "{{ cert_manager_default_issuer_name | default('letsencrypt-prod') }}"
|
||||
|
||||
# Метрики
|
||||
loki_metrics_enabled: true
|
||||
# ServiceMonitor создаётся только когда addon_prometheus_stack: true
|
||||
|
||||
loki_resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
100
addons/loki/role/tasks/main.yml
Normal file
100
addons/loki/role/tasks/main.yml
Normal file
@@ -0,0 +1,100 @@
|
||||
---
|
||||
- name: Add Grafana Helm repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: grafana
|
||||
repo_url: "{{ loki_chart_repo }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Create Loki namespace
|
||||
ansible.builtin.command: >
|
||||
k3s kubectl create namespace {{ loki_namespace }}
|
||||
--dry-run=client -o yaml | k3s kubectl apply -f -
|
||||
changed_when: false
|
||||
|
||||
- name: Create MinIO buckets for Loki
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition:
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: loki-create-minio-buckets
|
||||
namespace: "{{ loki_namespace }}"
|
||||
spec:
|
||||
ttlSecondsAfterFinished: 300
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: mc
|
||||
image: minio/mc:latest
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
mc alias set minio {{ loki_s3_endpoint }} {{ loki_s3_access_key }} {{ loki_s3_secret_key }}
|
||||
mc mb --ignore-existing minio/{{ loki_s3_bucket_chunks }}
|
||||
mc mb --ignore-existing minio/{{ loki_s3_bucket_ruler }}
|
||||
mc mb --ignore-existing minio/{{ loki_s3_bucket_admin }}
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
when: loki_storage_type == 's3'
|
||||
|
||||
- name: Wait for MinIO bucket Job to complete
|
||||
ansible.builtin.command: >
|
||||
k3s kubectl -n {{ loki_namespace }}
|
||||
wait job/loki-create-minio-buckets
|
||||
--for=condition=complete --timeout=120s
|
||||
changed_when: false
|
||||
when: loki_storage_type == 's3'
|
||||
|
||||
- name: Template Loki values
|
||||
ansible.builtin.template:
|
||||
src: loki-values.yaml.j2
|
||||
dest: /tmp/loki-values.yaml
|
||||
mode: '0644'
|
||||
|
||||
- name: Install Loki via Helm
|
||||
kubernetes.core.helm:
|
||||
name: loki
|
||||
chart_ref: grafana/loki
|
||||
chart_version: "{{ loki_version }}"
|
||||
release_namespace: "{{ loki_namespace }}"
|
||||
create_namespace: true
|
||||
wait: true
|
||||
timeout: "10m0s"
|
||||
values_files:
|
||||
- /tmp/loki-values.yaml
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Wait for Loki to be ready
|
||||
ansible.builtin.command: >
|
||||
k3s kubectl -n {{ loki_namespace }}
|
||||
rollout status statefulset/loki --timeout=180s
|
||||
changed_when: false
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: Template Loki Grafana datasource ConfigMap
|
||||
ansible.builtin.template:
|
||||
src: loki-grafana-datasource.yaml.j2
|
||||
dest: /tmp/loki-grafana-datasource.yaml
|
||||
mode: '0644'
|
||||
when: addon_prometheus_stack | default(false) | bool
|
||||
|
||||
- name: Apply Loki Grafana datasource ConfigMap
|
||||
ansible.builtin.command: k3s kubectl apply -f /tmp/loki-grafana-datasource.yaml
|
||||
changed_when: true
|
||||
when: addon_prometheus_stack | default(false) | bool
|
||||
|
||||
- name: Show Loki access info
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Loki установлен в namespace: {{ loki_namespace }}"
|
||||
- "Push URL (для Promtail): http://loki.{{ loki_namespace }}.svc.cluster.local:3100/loki/api/v1/push"
|
||||
- "Query URL: http://loki.{{ loki_namespace }}.svc.cluster.local:3100"
|
||||
- "Хранилище: {{ loki_storage_type }}, retention: {{ loki_retention_period }}"
|
||||
- "{% if addon_prometheus_stack | default(false) | bool %}Datasource 'Loki' добавлен в Grafana автоматически{% else %}Добавь datasource в Grafana: тип Loki, URL http://loki.{{ loki_namespace }}.svc.cluster.local:3100{% endif %}"
|
||||
- "{% if addon_promtail | default(false) | bool %}Promtail уже установлен — логи собираются{% else %}Установи Promtail: make addon-promtail{% endif %}"
|
||||
26
addons/loki/role/templates/loki-grafana-datasource.yaml.j2
Normal file
26
addons/loki/role/templates/loki-grafana-datasource.yaml.j2
Normal file
@@ -0,0 +1,26 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: loki-grafana-datasource
|
||||
namespace: {{ prometheus_stack_namespace | default('monitoring') }}
|
||||
labels:
|
||||
grafana_datasource: "1"
|
||||
data:
|
||||
loki-datasource.yaml: |
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Loki
|
||||
type: loki
|
||||
uid: loki
|
||||
access: proxy
|
||||
url: http://loki.{{ loki_namespace }}.svc.cluster.local:3100
|
||||
isDefault: false
|
||||
jsonData:
|
||||
maxLines: 1000
|
||||
{% if addon_tempo | default(false) | bool %}
|
||||
derivedFields:
|
||||
- datasourceUid: tempo
|
||||
matcherRegex: "traceID=(\\w+)"
|
||||
name: TraceID
|
||||
url: '${__value.raw}'
|
||||
{% endif %}
|
||||
120
addons/loki/role/templates/loki-values.yaml.j2
Normal file
120
addons/loki/role/templates/loki-values.yaml.j2
Normal file
@@ -0,0 +1,120 @@
|
||||
## Loki Helm values — Ansible managed
|
||||
|
||||
deploymentMode: SingleBinary
|
||||
|
||||
loki:
|
||||
auth_enabled: false
|
||||
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
|
||||
storage:
|
||||
{% if loki_storage_type == 's3' %}
|
||||
type: s3
|
||||
s3:
|
||||
endpoint: "{{ loki_s3_endpoint }}"
|
||||
region: "{{ loki_s3_region }}"
|
||||
secretAccessKey: "{{ loki_s3_secret_key }}"
|
||||
accessKeyId: "{{ loki_s3_access_key }}"
|
||||
s3ForcePathStyle: true
|
||||
insecure: true
|
||||
bucketNames:
|
||||
chunks: "{{ loki_s3_bucket_chunks }}"
|
||||
ruler: "{{ loki_s3_bucket_ruler }}"
|
||||
admin: "{{ loki_s3_bucket_admin }}"
|
||||
{% else %}
|
||||
type: filesystem
|
||||
{% endif %}
|
||||
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2024-04-01"
|
||||
store: tsdb
|
||||
object_store: "{{ loki_storage_type == 's3' | ternary('s3', 'filesystem') }}"
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
limits_config:
|
||||
retention_period: "{{ loki_retention_period }}"
|
||||
|
||||
compactor:
|
||||
retention_enabled: true
|
||||
|
||||
singleBinary:
|
||||
replicas: {{ loki_replicas }}
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ loki_resources.requests.cpu }}"
|
||||
memory: "{{ loki_resources.requests.memory }}"
|
||||
limits:
|
||||
cpu: "{{ loki_resources.limits.cpu }}"
|
||||
memory: "{{ loki_resources.limits.memory }}"
|
||||
|
||||
persistence:
|
||||
enabled: {{ (loki_storage_type != 's3') | lower }}
|
||||
size: "{{ loki_storage_size }}"
|
||||
{% if loki_storage_class %}
|
||||
storageClass: "{{ loki_storage_class }}"
|
||||
{% endif %}
|
||||
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
|
||||
# Отключить масштабируемые компоненты (используется singleBinary)
|
||||
read:
|
||||
replicas: 0
|
||||
write:
|
||||
replicas: 0
|
||||
backend:
|
||||
replicas: 0
|
||||
|
||||
# Кэши отключены для простоты (singleBinary не требует)
|
||||
chunksCache:
|
||||
enabled: false
|
||||
resultsCache:
|
||||
enabled: false
|
||||
|
||||
# Отключить self-monitoring (требует Grafana Agent Operator)
|
||||
monitoring:
|
||||
selfMonitoring:
|
||||
enabled: false
|
||||
grafanaAgent:
|
||||
installOperator: false
|
||||
lokiCanary:
|
||||
enabled: false
|
||||
serviceMonitor:
|
||||
enabled: {{ loki_metrics_enabled | lower }}
|
||||
additionalLabels:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
# Тесты создают лишние поды — отключаем
|
||||
test:
|
||||
enabled: false
|
||||
|
||||
# Gateway (nginx) отключён — прямое обращение к сервису
|
||||
gateway:
|
||||
enabled: false
|
||||
|
||||
{% if loki_ingress_enabled %}
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: "{{ loki_ingress_class }}"
|
||||
hosts:
|
||||
- host: "{{ loki_ingress_host }}"
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
{% if loki_ingress_tls %}
|
||||
tls:
|
||||
- secretName: loki-tls
|
||||
hosts:
|
||||
- "{{ loki_ingress_host }}"
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "{{ loki_ingress_cert_issuer }}"
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
7
addons/promtail/playbook.yml
Normal file
7
addons/promtail/playbook.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Install Promtail
|
||||
hosts: k3s_master[0]
|
||||
gather_facts: false
|
||||
become: true
|
||||
roles:
|
||||
- role: "{{ playbook_dir }}/role"
|
||||
22
addons/promtail/role/defaults/main.yml
Normal file
22
addons/promtail/role/defaults/main.yml
Normal file
@@ -0,0 +1,22 @@
|
||||
---
|
||||
promtail_version: "6.16.4"
|
||||
promtail_namespace: "promtail"
|
||||
promtail_chart_repo: "https://grafana.github.io/helm-charts"
|
||||
|
||||
# URL Loki push endpoint
|
||||
promtail_loki_url: "http://loki.{{ loki_namespace | default('loki') }}.svc.cluster.local:3100/loki/api/v1/push"
|
||||
|
||||
# Tenant ID (оставь пустым если Loki auth_enabled: false)
|
||||
promtail_tenant_id: ""
|
||||
|
||||
# Метрики
|
||||
promtail_metrics_enabled: true
|
||||
# ServiceMonitor создаётся только когда addon_prometheus_stack: true
|
||||
|
||||
promtail_resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 128Mi
|
||||
61
addons/promtail/role/tasks/main.yml
Normal file
61
addons/promtail/role/tasks/main.yml
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
- name: Add Grafana Helm repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: grafana
|
||||
repo_url: "{{ promtail_chart_repo }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Install Promtail via Helm
|
||||
kubernetes.core.helm:
|
||||
name: promtail
|
||||
chart_ref: grafana/promtail
|
||||
chart_version: "{{ promtail_version }}"
|
||||
release_namespace: "{{ promtail_namespace }}"
|
||||
create_namespace: true
|
||||
wait: true
|
||||
timeout: "5m0s"
|
||||
values:
|
||||
config:
|
||||
clients:
|
||||
- url: "{{ promtail_loki_url }}"
|
||||
{% if promtail_tenant_id %}
|
||||
tenantID: "{{ promtail_tenant_id }}"
|
||||
{% endif %}
|
||||
snippets:
|
||||
pipelineStages:
|
||||
- cri: {}
|
||||
|
||||
tolerations:
|
||||
- operator: Exists # DaemonSet на всех нодах включая мастера
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ promtail_resources.requests.cpu }}"
|
||||
memory: "{{ promtail_resources.requests.memory }}"
|
||||
limits:
|
||||
cpu: "{{ promtail_resources.limits.cpu }}"
|
||||
memory: "{{ promtail_resources.limits.memory }}"
|
||||
|
||||
serviceMonitor:
|
||||
enabled: "{{ promtail_metrics_enabled | bool and addon_prometheus_stack | default(false) | bool }}"
|
||||
labels:
|
||||
release: kube-prometheus-stack
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Wait for Promtail DaemonSet to be ready
|
||||
ansible.builtin.command: >
|
||||
k3s kubectl -n {{ promtail_namespace }}
|
||||
rollout status daemonset/promtail --timeout=120s
|
||||
changed_when: false
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: Show Promtail access info
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Promtail установлен в namespace: {{ promtail_namespace }}"
|
||||
- "Отправляет логи в: {{ promtail_loki_url }}"
|
||||
- "DaemonSet собирает логи со всех нод кластера"
|
||||
- "Логи доступны в Grafana → Explore → datasource Loki"
|
||||
7
addons/pushgateway/playbook.yml
Normal file
7
addons/pushgateway/playbook.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Install Prometheus Pushgateway
|
||||
hosts: k3s_master[0]
|
||||
gather_facts: false
|
||||
become: true
|
||||
roles:
|
||||
- role: "{{ playbook_dir }}/role"
|
||||
29
addons/pushgateway/role/defaults/main.yml
Normal file
29
addons/pushgateway/role/defaults/main.yml
Normal file
@@ -0,0 +1,29 @@
|
||||
---
|
||||
pushgateway_version: "2.14.0"
|
||||
# Устанавливается в namespace prometheus-stack для близости к Prometheus
|
||||
pushgateway_namespace: "{{ prometheus_stack_namespace | default('monitoring') }}"
|
||||
pushgateway_chart_repo: "https://prometheus-community.github.io/helm-charts"
|
||||
|
||||
# Persistence — сохраняет метрики между рестартами пода
|
||||
pushgateway_persistence_enabled: false
|
||||
pushgateway_persistence_size: "2Gi"
|
||||
pushgateway_persistence_class: "" # "" = default StorageClass
|
||||
|
||||
# Ingress
|
||||
pushgateway_ingress_enabled: false
|
||||
pushgateway_ingress_host: "pushgateway.local"
|
||||
pushgateway_ingress_class: "{{ ingress_nginx_class_name | default('nginx') }}"
|
||||
pushgateway_ingress_tls: false
|
||||
pushgateway_ingress_cert_issuer: "{{ cert_manager_default_issuer_name | default('letsencrypt-prod') }}"
|
||||
|
||||
# Метрики
|
||||
pushgateway_metrics_enabled: true
|
||||
# ServiceMonitor создаётся только когда addon_prometheus_stack: true
|
||||
|
||||
pushgateway_resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 64Mi
|
||||
74
addons/pushgateway/role/tasks/main.yml
Normal file
74
addons/pushgateway/role/tasks/main.yml
Normal file
@@ -0,0 +1,74 @@
|
||||
---
|
||||
- name: Add prometheus-community Helm repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: prometheus-community
|
||||
repo_url: "{{ pushgateway_chart_repo }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Install Prometheus Pushgateway via Helm
|
||||
kubernetes.core.helm:
|
||||
name: pushgateway
|
||||
chart_ref: prometheus-community/prometheus-pushgateway
|
||||
chart_version: "{{ pushgateway_version }}"
|
||||
release_namespace: "{{ pushgateway_namespace }}"
|
||||
create_namespace: true
|
||||
wait: true
|
||||
timeout: "5m0s"
|
||||
values:
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ pushgateway_resources.requests.cpu }}"
|
||||
memory: "{{ pushgateway_resources.requests.memory }}"
|
||||
limits:
|
||||
cpu: "{{ pushgateway_resources.limits.cpu }}"
|
||||
memory: "{{ pushgateway_resources.limits.memory }}"
|
||||
|
||||
persistentVolume:
|
||||
enabled: "{{ pushgateway_persistence_enabled | bool }}"
|
||||
size: "{{ pushgateway_persistence_size }}"
|
||||
{% if pushgateway_persistence_class %}
|
||||
storageClass: "{{ pushgateway_persistence_class }}"
|
||||
{% endif %}
|
||||
|
||||
serviceMonitor:
|
||||
enabled: "{{ pushgateway_metrics_enabled | bool and addon_prometheus_stack | default(false) | bool }}"
|
||||
namespace: "{{ pushgateway_namespace }}"
|
||||
additionalLabels:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
ingress:
|
||||
enabled: "{{ pushgateway_ingress_enabled | bool }}"
|
||||
{% if pushgateway_ingress_enabled %}
|
||||
ingressClassName: "{{ pushgateway_ingress_class }}"
|
||||
hosts:
|
||||
- host: "{{ pushgateway_ingress_host }}"
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
{% if pushgateway_ingress_tls %}
|
||||
tls:
|
||||
- secretName: pushgateway-tls
|
||||
hosts:
|
||||
- "{{ pushgateway_ingress_host }}"
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: "{{ pushgateway_ingress_cert_issuer }}"
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Show Pushgateway access info
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Pushgateway установлен в namespace: {{ pushgateway_namespace }}"
|
||||
- "URL внутри кластера: http://pushgateway-prometheus-pushgateway.{{ pushgateway_namespace }}.svc.cluster.local:9091"
|
||||
- "Пример отправки метрики:"
|
||||
- " echo 'job_duration_seconds 42' | curl --data-binary @- http://pushgateway-prometheus-pushgateway.{{ pushgateway_namespace }}.svc.cluster.local:9091/metrics/job/my_job"
|
||||
- "{% if pushgateway_ingress_enabled %}UI: http{{ 's' if pushgateway_ingress_tls else '' }}://{{ pushgateway_ingress_host }}{% else %}Port-forward: kubectl port-forward svc/pushgateway-prometheus-pushgateway -n {{ pushgateway_namespace }} 9091:9091{% endif %}"
|
||||
- "{% if addon_prometheus_stack | default(false) | bool %}Prometheus автоматически скрейпит Pushgateway через ServiceMonitor{% endif %}"
|
||||
7
addons/tempo/playbook.yml
Normal file
7
addons/tempo/playbook.yml
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Install Tempo
|
||||
hosts: k3s_master[0]
|
||||
gather_facts: false
|
||||
become: true
|
||||
roles:
|
||||
- role: "{{ playbook_dir }}/role"
|
||||
31
addons/tempo/role/defaults/main.yml
Normal file
31
addons/tempo/role/defaults/main.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
---
|
||||
tempo_version: "1.10.3"
|
||||
tempo_namespace: "tempo"
|
||||
tempo_chart_repo: "https://grafana.github.io/helm-charts"
|
||||
|
||||
# Срок хранения трейсов
|
||||
tempo_retention_period: "720h" # 30 дней
|
||||
|
||||
# PVC для данных Tempo
|
||||
tempo_storage_size: "10Gi"
|
||||
tempo_storage_class: "" # "" = default StorageClass
|
||||
|
||||
# Receivers (протоколы приёма трейсов)
|
||||
# OTLP gRPC:4317, OTLP HTTP:4318
|
||||
# Jaeger gRPC:14250, Jaeger HTTP:14268
|
||||
# Zipkin:9411
|
||||
tempo_receivers_otlp_enabled: true
|
||||
tempo_receivers_jaeger_enabled: true
|
||||
tempo_receivers_zipkin_enabled: false
|
||||
|
||||
# Метрики
|
||||
tempo_metrics_enabled: true
|
||||
# ServiceMonitor создаётся только когда addon_prometheus_stack: true
|
||||
|
||||
tempo_resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
102
addons/tempo/role/tasks/main.yml
Normal file
102
addons/tempo/role/tasks/main.yml
Normal file
@@ -0,0 +1,102 @@
|
||||
---
|
||||
- name: Add Grafana Helm repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: grafana
|
||||
repo_url: "{{ tempo_chart_repo }}"
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Install Tempo via Helm
|
||||
kubernetes.core.helm:
|
||||
name: tempo
|
||||
chart_ref: grafana/tempo
|
||||
chart_version: "{{ tempo_version }}"
|
||||
release_namespace: "{{ tempo_namespace }}"
|
||||
create_namespace: true
|
||||
wait: true
|
||||
timeout: "5m0s"
|
||||
values:
|
||||
tempo:
|
||||
retention: "{{ tempo_retention_period }}"
|
||||
|
||||
receivers:
|
||||
{% if tempo_receivers_otlp_enabled %}
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: "0.0.0.0:4317"
|
||||
http:
|
||||
endpoint: "0.0.0.0:4318"
|
||||
{% endif %}
|
||||
{% if tempo_receivers_jaeger_enabled %}
|
||||
jaeger:
|
||||
protocols:
|
||||
thrift_http:
|
||||
endpoint: "0.0.0.0:14268"
|
||||
grpc:
|
||||
endpoint: "0.0.0.0:14250"
|
||||
{% endif %}
|
||||
{% if tempo_receivers_zipkin_enabled %}
|
||||
zipkin:
|
||||
endpoint: "0.0.0.0:9411"
|
||||
{% endif %}
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
size: "{{ tempo_storage_size }}"
|
||||
{% if tempo_storage_class %}
|
||||
storageClassName: "{{ tempo_storage_class }}"
|
||||
{% endif %}
|
||||
|
||||
tolerations:
|
||||
- key: "node-role.kubernetes.io/control-plane"
|
||||
operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: "{{ tempo_resources.requests.cpu }}"
|
||||
memory: "{{ tempo_resources.requests.memory }}"
|
||||
limits:
|
||||
cpu: "{{ tempo_resources.limits.cpu }}"
|
||||
memory: "{{ tempo_resources.limits.memory }}"
|
||||
|
||||
serviceMonitor:
|
||||
enabled: "{{ tempo_metrics_enabled | bool and addon_prometheus_stack | default(false) | bool }}"
|
||||
additionalLabels:
|
||||
release: kube-prometheus-stack
|
||||
environment:
|
||||
KUBECONFIG: "{{ k3s_kubeconfig_path }}"
|
||||
|
||||
- name: Wait for Tempo to be ready
|
||||
ansible.builtin.command: >
|
||||
k3s kubectl -n {{ tempo_namespace }}
|
||||
rollout status deployment/tempo --timeout=120s
|
||||
changed_when: false
|
||||
retries: 3
|
||||
delay: 10
|
||||
|
||||
- name: Template Tempo Grafana datasource ConfigMap
|
||||
ansible.builtin.template:
|
||||
src: tempo-grafana-datasource.yaml.j2
|
||||
dest: /tmp/tempo-grafana-datasource.yaml
|
||||
mode: '0644'
|
||||
when: addon_prometheus_stack | default(false) | bool
|
||||
|
||||
- name: Apply Tempo Grafana datasource ConfigMap
|
||||
ansible.builtin.command: k3s kubectl apply -f /tmp/tempo-grafana-datasource.yaml
|
||||
changed_when: true
|
||||
when: addon_prometheus_stack | default(false) | bool
|
||||
|
||||
- name: Show Tempo access info
|
||||
ansible.builtin.debug:
|
||||
msg:
|
||||
- "Tempo установлен в namespace: {{ tempo_namespace }}"
|
||||
- "HTTP API (query): http://tempo.{{ tempo_namespace }}.svc.cluster.local:3200"
|
||||
- "{% if tempo_receivers_otlp_enabled %}OTLP gRPC: tempo.{{ tempo_namespace }}.svc.cluster.local:4317{% endif %}"
|
||||
- "{% if tempo_receivers_otlp_enabled %}OTLP HTTP: tempo.{{ tempo_namespace }}.svc.cluster.local:4318{% endif %}"
|
||||
- "{% if tempo_receivers_jaeger_enabled %}Jaeger HTTP: tempo.{{ tempo_namespace }}.svc.cluster.local:14268{% endif %}"
|
||||
- "{% if addon_prometheus_stack | default(false) | bool %}Datasource 'Tempo' добавлен в Grafana автоматически{% else %}Добавь datasource в Grafana: тип Tempo, URL http://tempo.{{ tempo_namespace }}.svc.cluster.local:3200{% endif %}"
|
||||
- "Отправляй трейсы из приложений через OTLP на tempo.{{ tempo_namespace }}.svc.cluster.local:4317"
|
||||
33
addons/tempo/role/templates/tempo-grafana-datasource.yaml.j2
Normal file
33
addons/tempo/role/templates/tempo-grafana-datasource.yaml.j2
Normal file
@@ -0,0 +1,33 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: tempo-grafana-datasource
|
||||
namespace: {{ prometheus_stack_namespace | default('monitoring') }}
|
||||
labels:
|
||||
grafana_datasource: "1"
|
||||
data:
|
||||
tempo-datasource.yaml: |
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
uid: tempo
|
||||
access: proxy
|
||||
url: http://tempo.{{ tempo_namespace }}.svc.cluster.local:3200
|
||||
isDefault: false
|
||||
jsonData:
|
||||
httpMethod: GET
|
||||
nodeGraph:
|
||||
enabled: true
|
||||
search:
|
||||
hide: false
|
||||
serviceMap:
|
||||
datasourceUid: prometheus
|
||||
{% if addon_loki | default(false) | bool %}
|
||||
tracesToLogsV2:
|
||||
datasourceUid: loki
|
||||
spanStartTimeShift: '-1h'
|
||||
spanEndTimeShift: '1h'
|
||||
filterByTraceID: true
|
||||
filterBySpanID: false
|
||||
{% endif %}
|
||||
@@ -21,6 +21,10 @@ addon_databasus: false # Databasus (резервное копиро
|
||||
addon_minio: false # MinIO (S3-совместимое объектное хранилище)
|
||||
addon_velero: false # Velero (резервное копирование кластера и PVC)
|
||||
addon_crowdsec: false # CrowdSec (обнаружение вторжений)
|
||||
addon_loki: false # Loki (агрегация логов)
|
||||
addon_promtail: false # Promtail (агент сбора логов → Loki)
|
||||
addon_tempo: false # Tempo (distributed tracing)
|
||||
addon_pushgateway: false # Prometheus Pushgateway (метрики batch-задач)
|
||||
|
||||
# ─── NFS Server ───────────────────────────────────────────────────────────────
|
||||
nfs_exports:
|
||||
@@ -177,6 +181,31 @@ minio_api_ingress_host: "s3.example.com"
|
||||
# crowdsec_collections: "crowdsecurity/linux crowdsecurity/nginx crowdsecurity/kubernetes"
|
||||
# crowdsec_nginx_bouncer_enabled: false
|
||||
|
||||
# ─── Loki ─────────────────────────────────────────────────────────────────────
|
||||
# loki_storage_type: "filesystem" # filesystem (авто: s3 если addon_minio: true)
|
||||
# loki_storage_size: "10Gi"
|
||||
# loki_retention_period: "720h" # 30 дней
|
||||
# loki_ingress_enabled: false
|
||||
# loki_ingress_host: "loki.example.com"
|
||||
|
||||
# ─── Promtail ─────────────────────────────────────────────────────────────────
|
||||
# DaemonSet на всех нодах. Требует addon_loki: true (или внешний Loki).
|
||||
# promtail_loki_url: "http://loki.loki.svc.cluster.local:3100/loki/api/v1/push"
|
||||
|
||||
# ─── Tempo ────────────────────────────────────────────────────────────────────
|
||||
# Distributed tracing backend.
|
||||
# tempo_storage_size: "10Gi"
|
||||
# tempo_retention_period: "720h"
|
||||
# tempo_receivers_otlp_enabled: true # порты 4317 (gRPC) и 4318 (HTTP)
|
||||
# tempo_receivers_jaeger_enabled: true # порты 14268 (HTTP) и 14250 (gRPC)
|
||||
# tempo_receivers_zipkin_enabled: false # порт 9411
|
||||
|
||||
# ─── Pushgateway ──────────────────────────────────────────────────────────────
|
||||
# Для push-метрик из batch-задач, CI/CD, скриптов.
|
||||
# pushgateway_ingress_enabled: false
|
||||
# pushgateway_ingress_host: "pushgateway.example.com"
|
||||
# pushgateway_persistence_enabled: false # сохранять метрики между рестартами
|
||||
|
||||
# ─── etcd backup ──────────────────────────────────────────────────────────────
|
||||
etcd_backup_dir: "{{ k3s_data_dir }}/server/db/snapshots"
|
||||
etcd_backup_retention: 5 # сколько снимков хранить
|
||||
|
||||
Reference in New Issue
Block a user