Добавил настройку автоматического рестарта кластера по patroni api

This commit is contained in:
Sergey Antropoff 2025-04-03 17:38:19 +03:00
parent 8242af3182
commit d325e6019a
7 changed files with 293 additions and 100 deletions

View File

@ -10,6 +10,7 @@
- Проверка состояния кластера после изменений
- Уведомление о необходимости перезагрузки нод (если требуется)
- Автоматическое управление историей конфигурационных файлов
- Автоматическая перезагрузка кластера (если требуется) настраивается в настройках
### Требования
- Ansible 2.9+
@ -22,7 +23,9 @@
- config_dir (по умолчанию: "/ansible/history") - директория для хранения истории конфигураций
- config_file (по умолчанию: "/ansible/patroni_config.yaml") - путь к файлу с изменениями конфигурации
- patroni_host (по умолчанию: "10.14.0.180") - хост кластера Patroni
- patroni_host (по умолчанию: "127.0.0.1") - хост кластера Patroni
- patroni_api_port (по умолчанию: "8008") - порт кластера Patroni
- autorestart (по умолчанию: "false") - включение автоматического рестарта кластера, если при изменении настроек они требуют перезагрузки
### Как внести изменения в конфиг кластера?
1. Для начала создайте новый branch по имени кластера.

View File

@ -1,5 +1,82 @@
#loop_wait: 10
#master_start_timeout: 300
#maximum_lag_on_failover: 1048576
postgresql:
parameters:
max_connections: 300
shared_buffers: "12GB"
use_pg_rewind: true
parameters:
# archive_command: pgbackrest --stanza=sandbox-postgres-cluster archive-push
# %p || cd .
# archive_mode: true
# archive_timeout: 1800s
# auto_explain.log_analyze: true
# auto_explain.log_buffers: true
# auto_explain.log_min_duration: 10s
# auto_explain.log_nested_statements: true
# auto_explain.log_timing: false
# auto_explain.log_triggers: true
# auto_explain.log_verbose: true
# autovacuum: true
# autovacuum_analyze_scale_factor: 0.02
# autovacuum_max_workers: 8
# autovacuum_naptime: 1s
# autovacuum_vacuum_cost_delay: 20
# autovacuum_vacuum_cost_limit: 200
# autovacuum_vacuum_scale_factor: 0.01
# checkpoint_completion_target: 0.9
# checkpoint_timeout: 15min
# default_statistics_target: 100
# effective_cache_size: 12GB
# effective_io_concurrency: 200
# hot_standby: true
# huge_pages: try
# log_checkpoints: true
# log_directory: /var/log/postgresql
# log_filename: postgresql-%a.log
# log_line_prefix: '%t [%p-%l] %r %q%u@%d '
# log_lock_waits: true
# log_rotation_age: 1d
# log_rotation_size: 0
# log_temp_files: 0
# log_timezone: Europe/Moscow
# log_truncate_on_rotation: true
# logging_collector: true
# maintenance_work_mem: 1GB
max_connections: 100
# max_files_per_process: 4096
# max_locks_per_transaction: 64
# max_parallel_maintenance_workers: 4
# max_parallel_workers: 8
# max_parallel_workers_per_gather: 4
# max_prepared_transactions: 0
# max_replication_slots: 10
# max_wal_senders: 10
# max_wal_size: 8GB
# max_worker_processes: 8
# min_wal_size: 2GB
# pg_stat_statements.max: 10000
# pg_stat_statements.save: false
# pg_stat_statements.track: all
# random_page_cost: 1.1
# seq_page_cost: 1
# shared_buffers: 4GB
# shared_preload_libraries: pg_stat_statements,auto_explain,pg_cron,pg_partman_bgw,redis_fdw
# superuser_reserved_connections: 5
# synchronous_commit: true
# timezone: Europe/Moscow
# track_activities: true
# track_counts: true
# track_functions: all
# track_io_timing: true
# wal_buffers: 16MB
# wal_keep_segments: 1000
# wal_level: replica
# wal_log_hints: true
# work_mem: 10485kB
# recovery_conf:
# restore_command: pgbackrest --stanza=sandbox-postgres-cluster archive-get
# %f %p
# use_pg_rewind: true
#retry_timeout: 10
#synchronous_mode: true
#synchronous_mode_strict: true
#synchronous_node_count: 1
#ttl: 100

View File

@ -1,4 +1,4 @@
---
collections:
- name: maxhoesel.proxmox
version: 5.0.1
# - name: maxhoesel.proxmox
# version: 5.0.1

View File

@ -1,4 +1,21 @@
---
- name: Log cleanup results
# handlers/main.yml
- name: Verify config application
ansible.builtin.uri:
url: "http://{{ patroni_host }}:{{ patroni_api_port }}/config"
method: GET
return_content: yes
status_code: 200
register: config_verification
delegate_to: localhost
listen: "config applied"
- name: Log config changes
ansible.builtin.debug:
msg: "Removed {{ (old_configs.files | sort(attribute='mtime'))[:-10] | length }} old config files"
msg: "Конфигурация успешно применена. Новые параметры: {{ config_verification.json | to_nice_json }}"
listen: "config applied"
- name: Log cleanup
ansible.builtin.debug:
msg: "Удалены старые конфигурационные файлы, сохранено последние 10 версий"
listen: "Log cleanup"

View File

@ -1,7 +1,8 @@
---
- name: Apply new configuration
# tasks/main.yml
- name: Apply new Patroni configuration
ansible.builtin.uri:
url: "http://{{ patroni_host }}:8008/config"
url: "http://{{ patroni_host }}:{{ patroni_api_port }}/config"
method: PATCH
body: "{{ new_config | to_json }}"
body_format: json
@ -10,103 +11,196 @@
Content-Type: "application/json"
register: apply_result
changed_when: apply_result.status == 200
notify: "config applied"
- name: Force wait for config to apply # noqa: no-handler
- name: Wait for config propagation # noqa: no-handler
ansible.builtin.wait_for:
timeout: 30
delay: 5
when: apply_result is changed
- name: Get verified cluster status # noqa: no-handler
ansible.builtin.uri:
url: "http://{{ patroni_host }}:8008/cluster"
method: GET
return_content: yes
status_code: 200
register: verified_cluster_status
delegate_to: localhost
connection: local
- name: Check for pending restarts # noqa: no-handler
when: apply_result is changed
- name: Display confirmed cluster status
ansible.builtin.debug:
msg: |
=== CONFIRMED CLUSTER STATUS ===
Leader: {{ (verified_cluster_status.json.members | selectattr('role', 'equalto', 'leader') | map(attribute='name') | first) | default('UNKNOWN') }}
Members:
{% for member in verified_cluster_status.json.members %}
- {{ member.name }} [{{ member.role | upper }}]
State: {{ member.state | default('UNKNOWN') }}
Lag: {{ member.lag | default(0) }}MB
Timeline: {{ member.timeline | default('N/A') }}
Pending restart: {{ member.pending_restart | default(false) | ternary('YES', 'NO') }}
{% endfor %}
Config Applied: {{ apply_result is changed | ternary('YES', 'NO') }}
================================
delegate_to: localhost
connection: local
run_once: true
block:
- name: Get cluster status with retry
ansible.builtin.uri:
url: "http://{{ patroni_host }}:{{ patroni_api_port }}/cluster"
method: GET
return_content: yes
status_code: 200
register: cluster_status
until: cluster_status.json is defined
retries: 3
delay: 2
delegate_to: localhost
run_once: true
- name: Refresh cluster status
ansible.builtin.uri:
url: "http://{{ patroni_host }}:8008/cluster"
method: GET
return_content: yes
status_code: 200
register: refreshed_cluster_status
delegate_to: localhost
run_once: true
when: verified_cluster_status is defined
- name: Check restart flags
ansible.builtin.set_fact:
needs_restart: >-
{{
(cluster_status.json.members |
map(attribute='pending_restart', default=false) |
select('equalto', true) | list | length > 0) or
(cluster_status.json.members |
map(attribute='tags.pending_restart', default=false) |
select('equalto', true) | list | length > 0)
}}
node_names: "{{ cluster_status.json.members | map(attribute='name') | list }}"
node_info: >-
{% set info = {} %}
{% for member in cluster_status.json.members %}
{% set _ = info.update({member.name: {'role': member.role}}) %}
{% endfor %}
{{ info }}
run_once: true
rescue:
- name: Set no restart needed
ansible.builtin.set_fact:
needs_restart: false
run_once: true
- name: Safe check for pending restarts
ansible.builtin.set_fact:
needs_restart: >-
{{
(refreshed_cluster_status.json.members |
map(attribute='pending_restart', default=false) |
select('equalto', true) | list | count > 0) or
(refreshed_cluster_status.json.members |
map(attribute='tags.pending_restart', default=false) |
select('equalto', true) | list | count > 0)
}}
node_names: >-
{{
refreshed_cluster_status.json.members |
map(attribute='name') |
list
}}
when:
- refreshed_cluster_status.json is defined
- refreshed_cluster_status.json.members is defined
run_once: true
delegate_to: localhost
- name: Show restart warning if needed
- name: Display restart warning
ansible.builtin.debug:
msg: |
{% if needs_restart %}
==================================
ВНИМАНИЕ: ТРЕБУЕТСЯ ПЕРЕЗАГРУЗКА
==================================
{% if autorestart %}
================================================
ПРЕДУПРЕЖДЕНИЕ: АВТОМАТИЧЕСКИЙ ПЕРЕЗАПУСК КЛАСТЕРА
================================================
Следующие ноды будут перезапущены:
{% for node in node_names %}
- {{ node }} ({{ node_info[node].role | default('UNKNOWN') }})
{% endfor %}
Не, я конечно могу и сам ролью, но вдруг кластер в проде или еще где!!!
Так что лучше выполнить следующую команду на одной из нод кластера:
Для отмены нажмите Ctrl+C в течение 10 секунд
{% else %}
============================================
ВНИМАНИЕ: НЕОБХОДИМ РУЧНОЙ ПЕРЕЗАПУСК КЛАСТЕРА
============================================
Выполните на одной из нод:
patronictl restart {{ node_names | join(' ') }}
patronictl restart -c /etc/patrony.yml {{ node_names | join(' ') }}
Затронутые ноды:
{% for node in node_names %}
- {{ node }}
{% endfor %}
Ноды для перезапуска:
{% for node in node_names %}
- {{ node }} ({{ node_info[node].role | default('UNKNOWN') }})
{% endfor %}
{% endif %}
{% else %}
==================================
СТАТУС: Перезагрузка не требуется
==================================
================================
ПЕРЕЗАГРУЗКА НЕ ТРЕБУЕТСЯ
================================
{% endif %}
delegate_to: localhost
run_once: true
when:
- needs_restart is defined
- node_names is defined
- node_info is defined
- name: Archive old configurations
- name: Confirm automatic restart
ansible.builtin.pause:
prompt: "Подтвердите автоматический перезапуск кластера (Enter - продолжить, Ctrl+C - отмена)"
seconds: 10
when:
- needs_restart | default(false)
- autorestart | default(false)
delegate_to: localhost
run_once: true
- name: Execute cluster restart
when:
- needs_restart | default(false)
- autorestart | bool
- cluster_status is defined
- cluster_status.json is defined
- cluster_status.json.members is defined
run_once: true
block:
- name: Find nodes needing restart
ansible.builtin.set_fact:
nodes_to_restart: >-
{%
set nodes = []
%}{%
for member in cluster_status.json.members
%}{%
if member.pending_restart is defined and member.pending_restart or
member.tags.pending_restart is defined and member.tags.pending_restart
%}{%
set _ = nodes.append(member)
%}{%
endif
%}{%
endfor
%}{{
nodes
}}
- name: Restart nodes via API
ansible.builtin.uri:
url: "http://{{ item.host }}:{{ patroni_api_port }}/restart"
method: POST
body_format: json
body:
restart_pending: true
timeout: 60
status_code: [200, 503]
loop: "{{ nodes_to_restart | default([]) }}"
loop_control:
label: "{{ item.name }}"
register: restart_results
ignore_errors: yes
changed_when: >
restart_results.status == 200 or
restart_results.status == 503
- name: Wait for cluster stabilization
block:
- name: Check cluster status until stable
ansible.builtin.uri:
url: "http://{{ patroni_host }}:{{ patroni_api_port }}/cluster"
method: GET
return_content: yes
status_code: 200
register: cluster_health
until: >
cluster_health.json.members |
selectattr('state', 'match', '^(running|streaming)$') |
list | length == cluster_health.json.members | length
retries: 12
delay: 10
delegate_to: localhost
run_once: true
- name: Show restart results
ansible.builtin.debug:
msg: |
========================
РЕЗУЛЬТАТЫ ПЕРЕЗАГРУЗКИ
========================
Нода: {{ item.item.name }}
Роль: {{ item.item.role }}
Статус: {% if item.status == 200 %}Успешно перезапущена{% elif item.status == 503 %}Перезапуск в процессе{% else %}Ошибка (код {{ item.status }}){% endif %}
Время выполнения: {{ item.elapsed }} сек
{% if item.item.pending_restart_reason is defined %}
Причина перезагрузки:
{% for param, values in item.item.pending_restart_reason.items() %}
- {{ param }}: было {{ values.old_value }}, стало {{ values.new_value }}
{% endfor %}
{% endif %}
------------------------
loop: "{{ restart_results.results | default([]) }}"
loop_control:
label: ""
run_once: true
- name: Archive old configurations # noqa: no-handler
when: apply_result is changed
run_once: true
block:
- name: Find old config files
ansible.builtin.find:
@ -117,14 +211,12 @@
delegate_to: localhost
connection: local
- name: Remove excess configs (keep last 10)
- name: Remove excess configs
ansible.builtin.file:
path: "{{ item.path }}"
state: absent
loop: "{{ (old_configs.files | sort(attribute='mtime'))[:-10] }}"
when:
- old_configs.matched > 10
- apply_result is changed
when: old_configs.matched > 10
delegate_to: localhost
connection: local
notify: Log cleanup results
notify: "Log cleanup"

View File

@ -1,5 +1,6 @@
---
- name: Подготовка ро<D180>к изменению конфнастроек кла<D0BB>стера
import_playbook: prepare/deploy.yaml
- name: Применение изменений нас<D0B0>троек кластера
import_playbook: apply/deploy.yaml

View File

@ -1,10 +1,13 @@
$ANSIBLE_VAULT;1.1;AES256
37376136623761343135636239653137353661303631663536613265366431333339663866643265
3033653765613632313661393166363238643137346330620a643233623433633963333035646466
34633366623262643165326331333937623064356131306663623362663663343861383735616365
3363646132393166310a353965346531616330396666383732656430633630323438326161323965
64323865636265303331663166393232376138663965613361623361303663353737623238373435
30316161616234356264643762653036626132613664316137646665323335663232393535353131
37636331646364313839653438323461353638363936623131626161353936303839393533326162
31623833313834646233303961656633633933386135396439373463623362316561313138643631
6663
37613833393263643830623437366465373832623162373161383334336162326635663538326537
3335386563373734636232356164636530393236353466610a366432353562343063376132643331
30656666326633616639383966386439663264306536396533343861656566343539376130343930
3932346663303035350a376233326363613763383139646262313531396466616635393166616435
30643637336364656432376436373161623438316165353534643135313831636565353638363734
61663964653362363533633664626435613738613538633761393231353435646463633661643839
61616239386133353964656133316463343036666234636132316334323865653937323830313065
34646633613736663362363631363131393439623137633162383235663938633237386439623562
61646233393030663464353864656362356138643635383561653063333839353139666432323765
37396633303231396631336264393032386561666534376635383962366365333934313734323632
62346631396162383438303434383031333662386132393434353832323631653533346363333534
36616639623533633639