refactor: упростить логику получения контейнеров и WebSocket
- Упрощена функция list_containers для предотвращения зависания - Убрана сложная логика health check проверок - Упрощена WebSocket логика получения логов - Убраны таймауты и сложная обработка ошибок - Добавлена базовая обработка ошибок для стабильности Автор: Сергей Антропов Сайт: https://devops.org.ru
This commit is contained in:
parent
e2563629e1
commit
d6e606ac1f
241
app.py
241
app.py
@ -53,104 +53,63 @@ def verify_ws_token(token: str) -> bool:
|
||||
# ---------- DOCKER HELPERS ----------
|
||||
def list_containers(project: Optional[str] = None, include_stopped: bool = False) -> List[Dict]:
|
||||
"""
|
||||
Получает список контейнеров, пропуская контейнеры с проблемными health check
|
||||
Получает список контейнеров с упрощенной логикой для предотвращения зависания
|
||||
Автор: Сергей Антропов
|
||||
Сайт: https://devops.org.ru
|
||||
"""
|
||||
import signal
|
||||
import time
|
||||
|
||||
items = []
|
||||
|
||||
# Функция для обработки таймаута
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Timeout getting container list")
|
||||
|
||||
try:
|
||||
# Устанавливаем таймаут на получение списка контейнеров
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(CONTAINER_LIST_TIMEOUT)
|
||||
|
||||
# Получаем список контейнеров с обработкой ошибок
|
||||
containers = []
|
||||
try:
|
||||
containers = docker_client.containers.list(all=include_stopped)
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка получения списка контейнеров: {e}")
|
||||
return []
|
||||
# Получаем список контейнеров с базовой обработкой ошибок
|
||||
containers = docker_client.containers.list(all=include_stopped)
|
||||
|
||||
for c in containers:
|
||||
try:
|
||||
# Проверяем health status контейнера с таймаутом
|
||||
health_status = None
|
||||
try:
|
||||
# Устанавливаем таймаут на получение health status
|
||||
signal.alarm(HEALTH_CHECK_TIMEOUT)
|
||||
health_status = c.attrs.get("State", {}).get("Health", {}).get("Status")
|
||||
signal.alarm(0) # Отменяем таймаут
|
||||
except TimeoutError:
|
||||
print(f"⚠️ Таймаут при получении health status для контейнера {c.name} (ID: {c.id[:12]})")
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"⚠️ Пропускаем контейнер {c.name} (ID: {c.id[:12]}): не удается получить health status - {e}")
|
||||
continue
|
||||
|
||||
# Пропускаем контейнеры с проблемными health check (если включено)
|
||||
if SKIP_UNHEALTHY and health_status == "unhealthy":
|
||||
print(f"⚠️ Пропускаем нездоровый контейнер {c.name} (ID: {c.id[:12]})")
|
||||
continue
|
||||
|
||||
# Получаем информацию о контейнере с таймаутом
|
||||
try:
|
||||
signal.alarm(CONTAINER_INFO_TIMEOUT)
|
||||
labels = c.labels or {}
|
||||
proj = labels.get("com.docker.compose.project")
|
||||
svc = labels.get("com.docker.compose.service") or c.name
|
||||
signal.alarm(0)
|
||||
except TimeoutError:
|
||||
print(f"⚠️ Таймаут при получении меток контейнера {c.name} (ID: {c.id[:12]})")
|
||||
continue
|
||||
|
||||
if project and proj != project:
|
||||
continue
|
||||
|
||||
# Получаем информацию об образе с таймаутом
|
||||
try:
|
||||
signal.alarm(HEALTH_CHECK_TIMEOUT)
|
||||
image_info = c.image.tags[0] if c.image and c.image.tags else c.image.short_id
|
||||
signal.alarm(0)
|
||||
except TimeoutError:
|
||||
print(f"⚠️ Таймаут при получении информации об образе для контейнера {c.name} (ID: {c.id[:12]})")
|
||||
image_info = "unknown"
|
||||
except Exception:
|
||||
image_info = "unknown"
|
||||
|
||||
items.append({
|
||||
# Базовая информация о контейнере (без health check)
|
||||
basic_info = {
|
||||
"id": c.id[:12],
|
||||
"name": c.name,
|
||||
"image": image_info,
|
||||
"status": c.status,
|
||||
"service": svc,
|
||||
"project": proj,
|
||||
"health": health_status,
|
||||
})
|
||||
"image": "unknown",
|
||||
"service": c.name,
|
||||
"project": None,
|
||||
"health": None,
|
||||
}
|
||||
|
||||
# Безопасно получаем метки
|
||||
try:
|
||||
labels = c.labels or {}
|
||||
basic_info["project"] = labels.get("com.docker.compose.project")
|
||||
basic_info["service"] = labels.get("com.docker.compose.service") or c.name
|
||||
except Exception:
|
||||
pass # Используем значения по умолчанию
|
||||
|
||||
# Безопасно получаем информацию об образе
|
||||
try:
|
||||
if c.image and c.image.tags:
|
||||
basic_info["image"] = c.image.tags[0]
|
||||
elif c.image:
|
||||
basic_info["image"] = c.image.short_id
|
||||
except Exception:
|
||||
pass # Оставляем "unknown"
|
||||
|
||||
# Фильтрация по проекту
|
||||
if project and basic_info["project"] != project:
|
||||
continue
|
||||
|
||||
# Добавляем контейнер в список
|
||||
items.append(basic_info)
|
||||
|
||||
except Exception as e:
|
||||
# Пропускаем контейнеры, которые вызывают ошибки
|
||||
# Пропускаем контейнеры с критическими ошибками
|
||||
print(f"⚠️ Пропускаем проблемный контейнер {c.name if hasattr(c, 'name') else 'unknown'} (ID: {c.id[:12]}): {e}")
|
||||
continue
|
||||
|
||||
signal.alarm(0) # Отменяем таймаут
|
||||
|
||||
except TimeoutError:
|
||||
print("❌ Таймаут при получении списка контейнеров")
|
||||
signal.alarm(0)
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"❌ Критическая ошибка при получении списка контейнеров: {e}")
|
||||
signal.alarm(0)
|
||||
print(f"❌ Ошибка получения списка контейнеров: {e}")
|
||||
return []
|
||||
|
||||
# Сортируем по проекту, сервису и имени
|
||||
items.sort(key=lambda x: (x.get("project") or "", x.get("service") or "", x.get("name") or ""))
|
||||
return items
|
||||
|
||||
@ -208,26 +167,35 @@ async def ws_logs(ws: WebSocket, container_id: str, tail: int = DEFAULT_TAIL, to
|
||||
await ws.close(); return
|
||||
|
||||
def find_by_id_prefix(prefix: str):
|
||||
for c in docker_client.containers.list(all=True):
|
||||
if c.id.startswith(prefix):
|
||||
return c
|
||||
"""Простой поиск контейнера по ID"""
|
||||
try:
|
||||
for c in docker_client.containers.list(all=True):
|
||||
if c.id.startswith(prefix):
|
||||
return c
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка поиска контейнера по ID {prefix}: {e}")
|
||||
return None
|
||||
|
||||
def find_by_service(service_name: str, project_name: Optional[str] = None):
|
||||
# pick the "newest" container of that compose service (optionally same project)
|
||||
found = []
|
||||
for c in docker_client.containers.list(all=True):
|
||||
lbl = c.labels or {}
|
||||
if lbl.get("com.docker.compose.service") == service_name and (project_name is None or lbl.get("com.docker.compose.project")==project_name):
|
||||
found.append(c)
|
||||
if not found:
|
||||
return None
|
||||
# sort by Created desc
|
||||
"""Простой поиск контейнера по сервису"""
|
||||
try:
|
||||
found.sort(key=lambda x: x.attrs.get("Created",""), reverse=True)
|
||||
except Exception:
|
||||
pass
|
||||
return found[0]
|
||||
found = []
|
||||
for c in docker_client.containers.list(all=True):
|
||||
try:
|
||||
lbl = c.labels or {}
|
||||
if lbl.get("com.docker.compose.service") == service_name and (project_name is None or lbl.get("com.docker.compose.project")==project_name):
|
||||
found.append(c)
|
||||
except Exception:
|
||||
continue # Пропускаем контейнеры с проблемными метками
|
||||
|
||||
if not found:
|
||||
return None
|
||||
|
||||
# Возвращаем первый найденный контейнер
|
||||
return found[0]
|
||||
except Exception as e:
|
||||
print(f"❌ Ошибка поиска контейнера по сервису {service_name}: {e}")
|
||||
return None
|
||||
|
||||
# initial resolve
|
||||
container = None
|
||||
@ -249,73 +217,38 @@ async def ws_logs(ws: WebSocket, container_id: str, tail: int = DEFAULT_TAIL, to
|
||||
svc_label = service
|
||||
proj_label = project
|
||||
|
||||
# streaming loop with reattach
|
||||
first_tail = tail
|
||||
# Упрощенная логика получения логов
|
||||
try:
|
||||
while True:
|
||||
if container is None and svc_label:
|
||||
container = find_by_service(svc_label, proj_label)
|
||||
# if still none, wait and try again
|
||||
if container is None:
|
||||
try:
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
except Exception:
|
||||
break
|
||||
if container is None:
|
||||
await ws.send_text("ERROR: container not found")
|
||||
break
|
||||
if container is None:
|
||||
await ws.send_text("ERROR: container not found")
|
||||
return
|
||||
|
||||
try:
|
||||
# On first attach use requested tail; on reattach use tail=0 to avoid duplicate backlog
|
||||
use_tail = first_tail
|
||||
first_tail = 0
|
||||
stream = container.logs(stream=True, follow=True, tail=(use_tail if use_tail>0 else "all"))
|
||||
# stream loop
|
||||
for chunk in stream:
|
||||
if chunk is None:
|
||||
break
|
||||
try:
|
||||
await ws.send_text(chunk.decode(errors="ignore"))
|
||||
except RuntimeError:
|
||||
# client side closed
|
||||
stream.close()
|
||||
return
|
||||
# Normal EOF (container stopped or recreated). Try to re-resolve by service label.
|
||||
stream.close()
|
||||
# Re-resolve. If same ID and container stopped, wait; if new ID, reattach.
|
||||
old_id = container.id
|
||||
container = None
|
||||
# small backoff
|
||||
await asyncio.sleep(1.0)
|
||||
if svc_label:
|
||||
container = find_by_service(svc_label, proj_label)
|
||||
if container and container.id == old_id:
|
||||
# same container (probably stopped) — keep waiting until it comes back
|
||||
container = None
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
# else: will loop and attach to new container
|
||||
continue
|
||||
else:
|
||||
# No service label -> break
|
||||
# Получаем логи контейнера
|
||||
try:
|
||||
stream = container.logs(stream=True, follow=True, tail=tail)
|
||||
|
||||
# Отправляем логи клиенту
|
||||
for chunk in stream:
|
||||
if chunk is None:
|
||||
break
|
||||
except WebSocketDisconnect:
|
||||
break
|
||||
except Exception as e:
|
||||
try:
|
||||
await ws.send_text(f"ERROR: {e}")
|
||||
await ws.send_text(chunk.decode(errors="ignore"))
|
||||
except Exception:
|
||||
pass
|
||||
# try re-resolve and continue
|
||||
container = None
|
||||
await asyncio.sleep(1.0)
|
||||
continue
|
||||
# Клиент отключился
|
||||
break
|
||||
|
||||
stream.close()
|
||||
|
||||
except Exception as e:
|
||||
await ws.send_text(f"ERROR: {e}")
|
||||
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
pass # Клиент отключился
|
||||
except Exception as e:
|
||||
try: await ws.send_text(f"ERROR: {e}")
|
||||
except Exception: pass
|
||||
try:
|
||||
await ws.send_text(f"ERROR: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
try:
|
||||
await ws.close()
|
||||
|
Loading…
x
Reference in New Issue
Block a user