Middleware that checks if the server is currently unavailable (e.g., scaling or draining) and returns a 503 Service Unavailable.
Source code in vllm/entrypoints/serve/middleware.py
| class ServiceUnavailableMiddleware:
"""
Middleware that checks if the server is currently unavailable
(e.g., scaling or draining) and returns a 503 Service Unavailable.
"""
def __init__(self, app: ASGIApp) -> None:
self.app = app
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
if scope["type"] != "http":
await self.app(scope, receive, send)
return
if is_rejecting_requests() and scope.get("path", "") not in _EXEMPT_PATHS:
response = JSONResponse(
content={"error": "Server is unavailable. Please try again later."},
status_code=503,
)
await response(scope, receive, send)
return
try:
await self.app(scope, receive, send)
except asyncio.CancelledError:
if not is_rejecting_requests():
raise
try:
response = JSONResponse(
content={"error": "Server is shutting down."},
status_code=503,
)
await response(scope, receive, send)
except (Exception, asyncio.CancelledError):
pass
|