From d325e6019a746270e3f95121cdac1a06d9fb7795 Mon Sep 17 00:00:00 2001 From: Sergey Antropoff Date: Thu, 3 Apr 2025 17:38:19 +0300 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB=20?= =?UTF-8?q?=D0=BD=D0=B0=D1=81=D1=82=D1=80=D0=BE=D0=B9=D0=BA=D1=83=20=D0=B0?= =?UTF-8?q?=D0=B2=D1=82=D0=BE=D0=BC=D0=B0=D1=82=D0=B8=D1=87=D0=B5=D1=81?= =?UTF-8?q?=D0=BA=D0=BE=D0=B3=D0=BE=20=D1=80=D0=B5=D1=81=D1=82=D0=B0=D1=80?= =?UTF-8?q?=D1=82=D0=B0=20=D0=BA=D0=BB=D0=B0=D1=81=D1=82=D0=B5=D1=80=D0=B0?= =?UTF-8?q?=20=D0=BF=D0=BE=20patroni=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 5 +- patroni_config.yaml | 85 +++++++++- requirements.yml | 4 +- roles/apply/handlers/main.yaml | 21 ++- roles/apply/tasks/role/main.yaml | 256 +++++++++++++++++++++---------- roles/deploy.yaml | 1 + vars/secrets.yml | 21 +-- 7 files changed, 293 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index fddc788..9e28068 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ - Проверка состояния кластера после изменений - Уведомление о необходимости перезагрузки нод (если требуется) - Автоматическое управление историей конфигурационных файлов +- Автоматическая перезагрузка кластера (если требуется) настраивается в настройках ### Требования - Ansible 2.9+ @@ -22,7 +23,9 @@ - config_dir (по умолчанию: "/ansible/history") - директория для хранения истории конфигураций - config_file (по умолчанию: "/ansible/patroni_config.yaml") - путь к файлу с изменениями конфигурации -- patroni_host (по умолчанию: "10.14.0.180") - хост кластера Patroni +- patroni_host (по умолчанию: "127.0.0.1") - хост кластера Patroni +- patroni_api_port (по умолчанию: "8008") - порт кластера Patroni +- autorestart (по умолчанию: "false") - включение автоматического рестарта кластера, если при изменении настроек они требуют перезагрузки ### Как внести изменения в конфиг кластера? 1. Для начала создайте новый branch по имени кластера. diff --git a/patroni_config.yaml b/patroni_config.yaml index 1f3d7e4..1f7c447 100644 --- a/patroni_config.yaml +++ b/patroni_config.yaml @@ -1,5 +1,82 @@ +#loop_wait: 10 +#master_start_timeout: 300 +#maximum_lag_on_failover: 1048576 postgresql: - parameters: - max_connections: 300 - shared_buffers: "12GB" - use_pg_rewind: true + parameters: +# archive_command: pgbackrest --stanza=sandbox-postgres-cluster archive-push +# %p || cd . +# archive_mode: true +# archive_timeout: 1800s +# auto_explain.log_analyze: true +# auto_explain.log_buffers: true +# auto_explain.log_min_duration: 10s +# auto_explain.log_nested_statements: true +# auto_explain.log_timing: false +# auto_explain.log_triggers: true +# auto_explain.log_verbose: true +# autovacuum: true +# autovacuum_analyze_scale_factor: 0.02 +# autovacuum_max_workers: 8 +# autovacuum_naptime: 1s +# autovacuum_vacuum_cost_delay: 20 +# autovacuum_vacuum_cost_limit: 200 +# autovacuum_vacuum_scale_factor: 0.01 +# checkpoint_completion_target: 0.9 +# checkpoint_timeout: 15min +# default_statistics_target: 100 +# effective_cache_size: 12GB +# effective_io_concurrency: 200 +# hot_standby: true +# huge_pages: try +# log_checkpoints: true +# log_directory: /var/log/postgresql +# log_filename: postgresql-%a.log +# log_line_prefix: '%t [%p-%l] %r %q%u@%d ' +# log_lock_waits: true +# log_rotation_age: 1d +# log_rotation_size: 0 +# log_temp_files: 0 +# log_timezone: Europe/Moscow +# log_truncate_on_rotation: true +# logging_collector: true +# maintenance_work_mem: 1GB + max_connections: 100 +# max_files_per_process: 4096 +# max_locks_per_transaction: 64 +# max_parallel_maintenance_workers: 4 +# max_parallel_workers: 8 +# max_parallel_workers_per_gather: 4 +# max_prepared_transactions: 0 +# max_replication_slots: 10 +# max_wal_senders: 10 +# max_wal_size: 8GB +# max_worker_processes: 8 +# min_wal_size: 2GB +# pg_stat_statements.max: 10000 +# pg_stat_statements.save: false +# pg_stat_statements.track: all +# random_page_cost: 1.1 +# seq_page_cost: 1 +# shared_buffers: 4GB +# shared_preload_libraries: pg_stat_statements,auto_explain,pg_cron,pg_partman_bgw,redis_fdw +# superuser_reserved_connections: 5 +# synchronous_commit: true +# timezone: Europe/Moscow +# track_activities: true +# track_counts: true +# track_functions: all +# track_io_timing: true +# wal_buffers: 16MB +# wal_keep_segments: 1000 +# wal_level: replica +# wal_log_hints: true +# work_mem: 10485kB +# recovery_conf: +# restore_command: pgbackrest --stanza=sandbox-postgres-cluster archive-get +# %f %p +# use_pg_rewind: true +#retry_timeout: 10 +#synchronous_mode: true +#synchronous_mode_strict: true +#synchronous_node_count: 1 +#ttl: 100 diff --git a/requirements.yml b/requirements.yml index cf12a33..010db52 100644 --- a/requirements.yml +++ b/requirements.yml @@ -1,4 +1,4 @@ --- collections: - - name: maxhoesel.proxmox - version: 5.0.1 +# - name: maxhoesel.proxmox +# version: 5.0.1 diff --git a/roles/apply/handlers/main.yaml b/roles/apply/handlers/main.yaml index 127cc2f..3918d14 100644 --- a/roles/apply/handlers/main.yaml +++ b/roles/apply/handlers/main.yaml @@ -1,4 +1,21 @@ --- -- name: Log cleanup results +# handlers/main.yml +- name: Verify config application + ansible.builtin.uri: + url: "http://{{ patroni_host }}:{{ patroni_api_port }}/config" + method: GET + return_content: yes + status_code: 200 + register: config_verification + delegate_to: localhost + listen: "config applied" + +- name: Log config changes ansible.builtin.debug: - msg: "Removed {{ (old_configs.files | sort(attribute='mtime'))[:-10] | length }} old config files" + msg: "Конфигурация успешно применена. Новые параметры: {{ config_verification.json | to_nice_json }}" + listen: "config applied" + +- name: Log cleanup + ansible.builtin.debug: + msg: "Удалены старые конфигурационные файлы, сохранено последние 10 версий" + listen: "Log cleanup" diff --git a/roles/apply/tasks/role/main.yaml b/roles/apply/tasks/role/main.yaml index 8bbcb96..e9638bc 100644 --- a/roles/apply/tasks/role/main.yaml +++ b/roles/apply/tasks/role/main.yaml @@ -1,7 +1,8 @@ --- -- name: Apply new configuration +# tasks/main.yml +- name: Apply new Patroni configuration ansible.builtin.uri: - url: "http://{{ patroni_host }}:8008/config" + url: "http://{{ patroni_host }}:{{ patroni_api_port }}/config" method: PATCH body: "{{ new_config | to_json }}" body_format: json @@ -10,103 +11,196 @@ Content-Type: "application/json" register: apply_result changed_when: apply_result.status == 200 + notify: "config applied" -- name: Force wait for config to apply # noqa: no-handler +- name: Wait for config propagation # noqa: no-handler ansible.builtin.wait_for: timeout: 30 delay: 5 when: apply_result is changed -- name: Get verified cluster status # noqa: no-handler - ansible.builtin.uri: - url: "http://{{ patroni_host }}:8008/cluster" - method: GET - return_content: yes - status_code: 200 - register: verified_cluster_status - delegate_to: localhost - connection: local +- name: Check for pending restarts # noqa: no-handler when: apply_result is changed - -- name: Display confirmed cluster status - ansible.builtin.debug: - msg: | - === CONFIRMED CLUSTER STATUS === - Leader: {{ (verified_cluster_status.json.members | selectattr('role', 'equalto', 'leader') | map(attribute='name') | first) | default('UNKNOWN') }} - Members: - {% for member in verified_cluster_status.json.members %} - - {{ member.name }} [{{ member.role | upper }}] - State: {{ member.state | default('UNKNOWN') }} - Lag: {{ member.lag | default(0) }}MB - Timeline: {{ member.timeline | default('N/A') }} - Pending restart: {{ member.pending_restart | default(false) | ternary('YES', 'NO') }} - {% endfor %} - Config Applied: {{ apply_result is changed | ternary('YES', 'NO') }} - ================================ - delegate_to: localhost - connection: local run_once: true + block: + - name: Get cluster status with retry + ansible.builtin.uri: + url: "http://{{ patroni_host }}:{{ patroni_api_port }}/cluster" + method: GET + return_content: yes + status_code: 200 + register: cluster_status + until: cluster_status.json is defined + retries: 3 + delay: 2 + delegate_to: localhost + run_once: true -- name: Refresh cluster status - ansible.builtin.uri: - url: "http://{{ patroni_host }}:8008/cluster" - method: GET - return_content: yes - status_code: 200 - register: refreshed_cluster_status - delegate_to: localhost - run_once: true - when: verified_cluster_status is defined + - name: Check restart flags + ansible.builtin.set_fact: + needs_restart: >- + {{ + (cluster_status.json.members | + map(attribute='pending_restart', default=false) | + select('equalto', true) | list | length > 0) or + (cluster_status.json.members | + map(attribute='tags.pending_restart', default=false) | + select('equalto', true) | list | length > 0) + }} + node_names: "{{ cluster_status.json.members | map(attribute='name') | list }}" + node_info: >- + {% set info = {} %} + {% for member in cluster_status.json.members %} + {% set _ = info.update({member.name: {'role': member.role}}) %} + {% endfor %} + {{ info }} + run_once: true + rescue: + - name: Set no restart needed + ansible.builtin.set_fact: + needs_restart: false + run_once: true -- name: Safe check for pending restarts - ansible.builtin.set_fact: - needs_restart: >- - {{ - (refreshed_cluster_status.json.members | - map(attribute='pending_restart', default=false) | - select('equalto', true) | list | count > 0) or - (refreshed_cluster_status.json.members | - map(attribute='tags.pending_restart', default=false) | - select('equalto', true) | list | count > 0) - }} - node_names: >- - {{ - refreshed_cluster_status.json.members | - map(attribute='name') | - list - }} - when: - - refreshed_cluster_status.json is defined - - refreshed_cluster_status.json.members is defined - run_once: true - delegate_to: localhost -- name: Show restart warning if needed +- name: Display restart warning ansible.builtin.debug: msg: | {% if needs_restart %} - ================================== - ВНИМАНИЕ: ТРЕБУЕТСЯ ПЕРЕЗАГРУЗКА - ================================== + {% if autorestart %} + ================================================ + ПРЕДУПРЕЖДЕНИЕ: АВТОМАТИЧЕСКИЙ ПЕРЕЗАПУСК КЛАСТЕРА + ================================================ + Следующие ноды будут перезапущены: + {% for node in node_names %} + - {{ node }} ({{ node_info[node].role | default('UNKNOWN') }}) + {% endfor %} - Не, я конечно могу и сам ролью, но вдруг кластер в проде или еще где!!! - Так что лучше выполнить следующую команду на одной из нод кластера: + Для отмены нажмите Ctrl+C в течение 10 секунд + {% else %} + ============================================ + ВНИМАНИЕ: НЕОБХОДИМ РУЧНОЙ ПЕРЕЗАПУСК КЛАСТЕРА + ============================================ + Выполните на одной из нод: - patronictl restart {{ node_names | join(' ') }} + patronictl restart -c /etc/patrony.yml {{ node_names | join(' ') }} - Затронутые ноды: - {% for node in node_names %} - - {{ node }} - {% endfor %} + Ноды для перезапуска: + {% for node in node_names %} + - {{ node }} ({{ node_info[node].role | default('UNKNOWN') }}) + {% endfor %} + {% endif %} {% else %} - ================================== - СТАТУС: Перезагрузка не требуется - ================================== + ================================ + ПЕРЕЗАГРУЗКА НЕ ТРЕБУЕТСЯ + ================================ {% endif %} delegate_to: localhost run_once: true + when: + - needs_restart is defined + - node_names is defined + - node_info is defined -- name: Archive old configurations +- name: Confirm automatic restart + ansible.builtin.pause: + prompt: "Подтвердите автоматический перезапуск кластера (Enter - продолжить, Ctrl+C - отмена)" + seconds: 10 + when: + - needs_restart | default(false) + - autorestart | default(false) + delegate_to: localhost + run_once: true + +- name: Execute cluster restart + when: + - needs_restart | default(false) + - autorestart | bool + - cluster_status is defined + - cluster_status.json is defined + - cluster_status.json.members is defined + run_once: true + block: + - name: Find nodes needing restart + ansible.builtin.set_fact: + nodes_to_restart: >- + {% + set nodes = [] + %}{% + for member in cluster_status.json.members + %}{% + if member.pending_restart is defined and member.pending_restart or + member.tags.pending_restart is defined and member.tags.pending_restart + %}{% + set _ = nodes.append(member) + %}{% + endif + %}{% + endfor + %}{{ + nodes + }} + + - name: Restart nodes via API + ansible.builtin.uri: + url: "http://{{ item.host }}:{{ patroni_api_port }}/restart" + method: POST + body_format: json + body: + restart_pending: true + timeout: 60 + status_code: [200, 503] + loop: "{{ nodes_to_restart | default([]) }}" + loop_control: + label: "{{ item.name }}" + register: restart_results + ignore_errors: yes + changed_when: > + restart_results.status == 200 or + restart_results.status == 503 + + - name: Wait for cluster stabilization + block: + - name: Check cluster status until stable + ansible.builtin.uri: + url: "http://{{ patroni_host }}:{{ patroni_api_port }}/cluster" + method: GET + return_content: yes + status_code: 200 + register: cluster_health + until: > + cluster_health.json.members | + selectattr('state', 'match', '^(running|streaming)$') | + list | length == cluster_health.json.members | length + retries: 12 + delay: 10 + delegate_to: localhost + run_once: true + + - name: Show restart results + ansible.builtin.debug: + msg: | + ======================== + РЕЗУЛЬТАТЫ ПЕРЕЗАГРУЗКИ + ======================== + Нода: {{ item.item.name }} + Роль: {{ item.item.role }} + Статус: {% if item.status == 200 %}Успешно перезапущена{% elif item.status == 503 %}Перезапуск в процессе{% else %}Ошибка (код {{ item.status }}){% endif %} + Время выполнения: {{ item.elapsed }} сек + {% if item.item.pending_restart_reason is defined %} + Причина перезагрузки: + {% for param, values in item.item.pending_restart_reason.items() %} + - {{ param }}: было {{ values.old_value }}, стало {{ values.new_value }} + {% endfor %} + {% endif %} + ------------------------ + loop: "{{ restart_results.results | default([]) }}" + loop_control: + label: "" + run_once: true + +- name: Archive old configurations # noqa: no-handler + when: apply_result is changed + run_once: true block: - name: Find old config files ansible.builtin.find: @@ -117,14 +211,12 @@ delegate_to: localhost connection: local - - name: Remove excess configs (keep last 10) + - name: Remove excess configs ansible.builtin.file: path: "{{ item.path }}" state: absent loop: "{{ (old_configs.files | sort(attribute='mtime'))[:-10] }}" - when: - - old_configs.matched > 10 - - apply_result is changed + when: old_configs.matched > 10 delegate_to: localhost connection: local - notify: Log cleanup results + notify: "Log cleanup" diff --git a/roles/deploy.yaml b/roles/deploy.yaml index a1f20d5..dedd962 100644 --- a/roles/deploy.yaml +++ b/roles/deploy.yaml @@ -1,5 +1,6 @@ --- - name: Подготовка ро�к изменению конфнастроек кла�стера import_playbook: prepare/deploy.yaml + - name: Применение изменений нас�троек кластера import_playbook: apply/deploy.yaml diff --git a/vars/secrets.yml b/vars/secrets.yml index 8bbfaa7..4b7f934 100644 --- a/vars/secrets.yml +++ b/vars/secrets.yml @@ -1,10 +1,13 @@ $ANSIBLE_VAULT;1.1;AES256 -37376136623761343135636239653137353661303631663536613265366431333339663866643265 -3033653765613632313661393166363238643137346330620a643233623433633963333035646466 -34633366623262643165326331333937623064356131306663623362663663343861383735616365 -3363646132393166310a353965346531616330396666383732656430633630323438326161323965 -64323865636265303331663166393232376138663965613361623361303663353737623238373435 -30316161616234356264643762653036626132613664316137646665323335663232393535353131 -37636331646364313839653438323461353638363936623131626161353936303839393533326162 -31623833313834646233303961656633633933386135396439373463623362316561313138643631 -6663 +37613833393263643830623437366465373832623162373161383334336162326635663538326537 +3335386563373734636232356164636530393236353466610a366432353562343063376132643331 +30656666326633616639383966386439663264306536396533343861656566343539376130343930 +3932346663303035350a376233326363613763383139646262313531396466616635393166616435 +30643637336364656432376436373161623438316165353534643135313831636565353638363734 +61663964653362363533633664626435613738613538633761393231353435646463633661643839 +61616239386133353964656133316463343036666234636132316334323865653937323830313065 +34646633613736663362363631363131393439623137633162383235663938633237386439623562 +61646233393030663464353864656362356138643635383561653063333839353139666432323765 +37396633303231396631336264393032386561666534376635383962366365333934313734323632 +62346631396162383438303434383031333662386132393434353832323631653533346363333534 +36616639623533633639