refactor: упростить логику получения контейнеров и WebSocket
- Упрощена функция list_containers для предотвращения зависания - Убрана сложная логика health check проверок - Упрощена WebSocket логика получения логов - Убраны таймауты и сложная обработка ошибок - Добавлена базовая обработка ошибок для стабильности Автор: Сергей Антропов Сайт: https://devops.org.ru
This commit is contained in:
parent
e2563629e1
commit
d6e606ac1f
241
app.py
241
app.py
@ -53,104 +53,63 @@ def verify_ws_token(token: str) -> bool:
|
|||||||
# ---------- DOCKER HELPERS ----------
|
# ---------- DOCKER HELPERS ----------
|
||||||
def list_containers(project: Optional[str] = None, include_stopped: bool = False) -> List[Dict]:
|
def list_containers(project: Optional[str] = None, include_stopped: bool = False) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
Получает список контейнеров, пропуская контейнеры с проблемными health check
|
Получает список контейнеров с упрощенной логикой для предотвращения зависания
|
||||||
Автор: Сергей Антропов
|
Автор: Сергей Антропов
|
||||||
Сайт: https://devops.org.ru
|
Сайт: https://devops.org.ru
|
||||||
"""
|
"""
|
||||||
import signal
|
|
||||||
import time
|
|
||||||
|
|
||||||
items = []
|
items = []
|
||||||
|
|
||||||
# Функция для обработки таймаута
|
|
||||||
def timeout_handler(signum, frame):
|
|
||||||
raise TimeoutError("Timeout getting container list")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Устанавливаем таймаут на получение списка контейнеров
|
# Получаем список контейнеров с базовой обработкой ошибок
|
||||||
signal.signal(signal.SIGALRM, timeout_handler)
|
containers = docker_client.containers.list(all=include_stopped)
|
||||||
signal.alarm(CONTAINER_LIST_TIMEOUT)
|
|
||||||
|
|
||||||
# Получаем список контейнеров с обработкой ошибок
|
|
||||||
containers = []
|
|
||||||
try:
|
|
||||||
containers = docker_client.containers.list(all=include_stopped)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Ошибка получения списка контейнеров: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
for c in containers:
|
for c in containers:
|
||||||
try:
|
try:
|
||||||
# Проверяем health status контейнера с таймаутом
|
# Базовая информация о контейнере (без health check)
|
||||||
health_status = None
|
basic_info = {
|
||||||
try:
|
|
||||||
# Устанавливаем таймаут на получение health status
|
|
||||||
signal.alarm(HEALTH_CHECK_TIMEOUT)
|
|
||||||
health_status = c.attrs.get("State", {}).get("Health", {}).get("Status")
|
|
||||||
signal.alarm(0) # Отменяем таймаут
|
|
||||||
except TimeoutError:
|
|
||||||
print(f"⚠️ Таймаут при получении health status для контейнера {c.name} (ID: {c.id[:12]})")
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
|
||||||
print(f"⚠️ Пропускаем контейнер {c.name} (ID: {c.id[:12]}): не удается получить health status - {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Пропускаем контейнеры с проблемными health check (если включено)
|
|
||||||
if SKIP_UNHEALTHY and health_status == "unhealthy":
|
|
||||||
print(f"⚠️ Пропускаем нездоровый контейнер {c.name} (ID: {c.id[:12]})")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Получаем информацию о контейнере с таймаутом
|
|
||||||
try:
|
|
||||||
signal.alarm(CONTAINER_INFO_TIMEOUT)
|
|
||||||
labels = c.labels or {}
|
|
||||||
proj = labels.get("com.docker.compose.project")
|
|
||||||
svc = labels.get("com.docker.compose.service") or c.name
|
|
||||||
signal.alarm(0)
|
|
||||||
except TimeoutError:
|
|
||||||
print(f"⚠️ Таймаут при получении меток контейнера {c.name} (ID: {c.id[:12]})")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if project and proj != project:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Получаем информацию об образе с таймаутом
|
|
||||||
try:
|
|
||||||
signal.alarm(HEALTH_CHECK_TIMEOUT)
|
|
||||||
image_info = c.image.tags[0] if c.image and c.image.tags else c.image.short_id
|
|
||||||
signal.alarm(0)
|
|
||||||
except TimeoutError:
|
|
||||||
print(f"⚠️ Таймаут при получении информации об образе для контейнера {c.name} (ID: {c.id[:12]})")
|
|
||||||
image_info = "unknown"
|
|
||||||
except Exception:
|
|
||||||
image_info = "unknown"
|
|
||||||
|
|
||||||
items.append({
|
|
||||||
"id": c.id[:12],
|
"id": c.id[:12],
|
||||||
"name": c.name,
|
"name": c.name,
|
||||||
"image": image_info,
|
|
||||||
"status": c.status,
|
"status": c.status,
|
||||||
"service": svc,
|
"image": "unknown",
|
||||||
"project": proj,
|
"service": c.name,
|
||||||
"health": health_status,
|
"project": None,
|
||||||
})
|
"health": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Безопасно получаем метки
|
||||||
|
try:
|
||||||
|
labels = c.labels or {}
|
||||||
|
basic_info["project"] = labels.get("com.docker.compose.project")
|
||||||
|
basic_info["service"] = labels.get("com.docker.compose.service") or c.name
|
||||||
|
except Exception:
|
||||||
|
pass # Используем значения по умолчанию
|
||||||
|
|
||||||
|
# Безопасно получаем информацию об образе
|
||||||
|
try:
|
||||||
|
if c.image and c.image.tags:
|
||||||
|
basic_info["image"] = c.image.tags[0]
|
||||||
|
elif c.image:
|
||||||
|
basic_info["image"] = c.image.short_id
|
||||||
|
except Exception:
|
||||||
|
pass # Оставляем "unknown"
|
||||||
|
|
||||||
|
# Фильтрация по проекту
|
||||||
|
if project and basic_info["project"] != project:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Добавляем контейнер в список
|
||||||
|
items.append(basic_info)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Пропускаем контейнеры, которые вызывают ошибки
|
# Пропускаем контейнеры с критическими ошибками
|
||||||
print(f"⚠️ Пропускаем проблемный контейнер {c.name if hasattr(c, 'name') else 'unknown'} (ID: {c.id[:12]}): {e}")
|
print(f"⚠️ Пропускаем проблемный контейнер {c.name if hasattr(c, 'name') else 'unknown'} (ID: {c.id[:12]}): {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
signal.alarm(0) # Отменяем таймаут
|
|
||||||
|
|
||||||
except TimeoutError:
|
|
||||||
print("❌ Таймаут при получении списка контейнеров")
|
|
||||||
signal.alarm(0)
|
|
||||||
return []
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Критическая ошибка при получении списка контейнеров: {e}")
|
print(f"❌ Ошибка получения списка контейнеров: {e}")
|
||||||
signal.alarm(0)
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Сортируем по проекту, сервису и имени
|
||||||
items.sort(key=lambda x: (x.get("project") or "", x.get("service") or "", x.get("name") or ""))
|
items.sort(key=lambda x: (x.get("project") or "", x.get("service") or "", x.get("name") or ""))
|
||||||
return items
|
return items
|
||||||
|
|
||||||
@ -208,26 +167,35 @@ async def ws_logs(ws: WebSocket, container_id: str, tail: int = DEFAULT_TAIL, to
|
|||||||
await ws.close(); return
|
await ws.close(); return
|
||||||
|
|
||||||
def find_by_id_prefix(prefix: str):
|
def find_by_id_prefix(prefix: str):
|
||||||
for c in docker_client.containers.list(all=True):
|
"""Простой поиск контейнера по ID"""
|
||||||
if c.id.startswith(prefix):
|
try:
|
||||||
return c
|
for c in docker_client.containers.list(all=True):
|
||||||
|
if c.id.startswith(prefix):
|
||||||
|
return c
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка поиска контейнера по ID {prefix}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def find_by_service(service_name: str, project_name: Optional[str] = None):
|
def find_by_service(service_name: str, project_name: Optional[str] = None):
|
||||||
# pick the "newest" container of that compose service (optionally same project)
|
"""Простой поиск контейнера по сервису"""
|
||||||
found = []
|
|
||||||
for c in docker_client.containers.list(all=True):
|
|
||||||
lbl = c.labels or {}
|
|
||||||
if lbl.get("com.docker.compose.service") == service_name and (project_name is None or lbl.get("com.docker.compose.project")==project_name):
|
|
||||||
found.append(c)
|
|
||||||
if not found:
|
|
||||||
return None
|
|
||||||
# sort by Created desc
|
|
||||||
try:
|
try:
|
||||||
found.sort(key=lambda x: x.attrs.get("Created",""), reverse=True)
|
found = []
|
||||||
except Exception:
|
for c in docker_client.containers.list(all=True):
|
||||||
pass
|
try:
|
||||||
return found[0]
|
lbl = c.labels or {}
|
||||||
|
if lbl.get("com.docker.compose.service") == service_name and (project_name is None or lbl.get("com.docker.compose.project")==project_name):
|
||||||
|
found.append(c)
|
||||||
|
except Exception:
|
||||||
|
continue # Пропускаем контейнеры с проблемными метками
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Возвращаем первый найденный контейнер
|
||||||
|
return found[0]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Ошибка поиска контейнера по сервису {service_name}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
# initial resolve
|
# initial resolve
|
||||||
container = None
|
container = None
|
||||||
@ -249,73 +217,38 @@ async def ws_logs(ws: WebSocket, container_id: str, tail: int = DEFAULT_TAIL, to
|
|||||||
svc_label = service
|
svc_label = service
|
||||||
proj_label = project
|
proj_label = project
|
||||||
|
|
||||||
# streaming loop with reattach
|
# Упрощенная логика получения логов
|
||||||
first_tail = tail
|
|
||||||
try:
|
try:
|
||||||
while True:
|
if container is None:
|
||||||
if container is None and svc_label:
|
await ws.send_text("ERROR: container not found")
|
||||||
container = find_by_service(svc_label, proj_label)
|
return
|
||||||
# if still none, wait and try again
|
|
||||||
if container is None:
|
|
||||||
try:
|
|
||||||
await asyncio.sleep(1.0)
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
break
|
|
||||||
if container is None:
|
|
||||||
await ws.send_text("ERROR: container not found")
|
|
||||||
break
|
|
||||||
|
|
||||||
try:
|
# Получаем логи контейнера
|
||||||
# On first attach use requested tail; on reattach use tail=0 to avoid duplicate backlog
|
try:
|
||||||
use_tail = first_tail
|
stream = container.logs(stream=True, follow=True, tail=tail)
|
||||||
first_tail = 0
|
|
||||||
stream = container.logs(stream=True, follow=True, tail=(use_tail if use_tail>0 else "all"))
|
# Отправляем логи клиенту
|
||||||
# stream loop
|
for chunk in stream:
|
||||||
for chunk in stream:
|
if chunk is None:
|
||||||
if chunk is None:
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
await ws.send_text(chunk.decode(errors="ignore"))
|
|
||||||
except RuntimeError:
|
|
||||||
# client side closed
|
|
||||||
stream.close()
|
|
||||||
return
|
|
||||||
# Normal EOF (container stopped or recreated). Try to re-resolve by service label.
|
|
||||||
stream.close()
|
|
||||||
# Re-resolve. If same ID and container stopped, wait; if new ID, reattach.
|
|
||||||
old_id = container.id
|
|
||||||
container = None
|
|
||||||
# small backoff
|
|
||||||
await asyncio.sleep(1.0)
|
|
||||||
if svc_label:
|
|
||||||
container = find_by_service(svc_label, proj_label)
|
|
||||||
if container and container.id == old_id:
|
|
||||||
# same container (probably stopped) — keep waiting until it comes back
|
|
||||||
container = None
|
|
||||||
await asyncio.sleep(1.0)
|
|
||||||
continue
|
|
||||||
# else: will loop and attach to new container
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# No service label -> break
|
|
||||||
break
|
break
|
||||||
except WebSocketDisconnect:
|
|
||||||
break
|
|
||||||
except Exception as e:
|
|
||||||
try:
|
try:
|
||||||
await ws.send_text(f"ERROR: {e}")
|
await ws.send_text(chunk.decode(errors="ignore"))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
# Клиент отключился
|
||||||
# try re-resolve and continue
|
break
|
||||||
container = None
|
|
||||||
await asyncio.sleep(1.0)
|
stream.close()
|
||||||
continue
|
|
||||||
|
except Exception as e:
|
||||||
|
await ws.send_text(f"ERROR: {e}")
|
||||||
|
|
||||||
except WebSocketDisconnect:
|
except WebSocketDisconnect:
|
||||||
pass
|
pass # Клиент отключился
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try: await ws.send_text(f"ERROR: {e}")
|
try:
|
||||||
except Exception: pass
|
await ws.send_text(f"ERROR: {e}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
finally:
|
finally:
|
||||||
try:
|
try:
|
||||||
await ws.close()
|
await ws.close()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user