--- # Chaos Engineering для тестирования отказоустойчивости # Автор: Сергей Антропов # Сайт: https://devops.org.ru - name: Chaos Network (add latency) hosts: localhost gather_facts: false vars: chaos_duration: "{{ chaos_duration | default(60) }}" chaos_latency: "{{ chaos_latency | default('100ms') }}" chaos_loss: "{{ chaos_loss | default('5%') }}" tasks: - name: Install chaos tools package: name: [iproute2, iptables, tc] state: present - name: Add network latency command: > tc qdisc add dev eth0 root netem delay {{ chaos_latency }} ignore_errors: true - name: Add packet loss command: > tc qdisc add dev eth0 root netem loss {{ chaos_loss }} ignore_errors: true - name: Wait for chaos duration pause: seconds: "{{ chaos_duration }}" - name: Remove network chaos command: > tc qdisc del dev eth0 root ignore_errors: true - name: Chaos Services (random failures) hosts: all become: true vars: chaos_services: - postgresql - redis - nginx - docker tasks: - name: Random service stop systemd: name: "{{ item }}" state: stopped loop: "{{ chaos_services }}" when: (ansible_play_hosts.index(inventory_hostname) + ansible_date_time.epoch) % 3 == 0 - name: Wait for chaos pause: seconds: 30 - name: Restart services systemd: name: "{{ item }}" state: started loop: "{{ chaos_services }}" when: (ansible_play_hosts.index(inventory_hostname) + ansible_date_time.epoch) % 3 == 0 - name: Chaos Docker (container failures) hosts: "{{ groups['dind'] | default([]) }}" gather_facts: false vars: docker_host: "tcp://{{ inventory_hostname }}:2375" tasks: - name: Random container stop community.docker.docker_container: name: "{{ item }}" state: stopped docker_host: "{{ docker_host }}" loop: "{{ ansible_play_hosts }}" when: (ansible_play_hosts.index(inventory_hostname) + ansible_date_time.epoch) % 4 == 0 - name: Wait for chaos pause: seconds: 20 - name: Restart containers community.docker.docker_container: name: "{{ item }}" state: started docker_host: "{{ docker_host }}" loop: "{{ ansible_play_hosts }}" when: (ansible_play_hosts.index(inventory_hostname) + ansible_date_time.epoch) % 4 == 0