vllm.entrypoints.serve.middleware ¶

ServiceUnavailableMiddleware ¶

Middleware that checks if the server is currently unavailable (e.g., scaling or draining) and returns a 503 Service Unavailable.

Source code in vllm/entrypoints/serve/middleware.py

class ServiceUnavailableMiddleware:
    """
    Middleware that checks if the server is currently unavailable
    (e.g., scaling or draining) and returns a 503 Service Unavailable.

    """

    def __init__(self, app: ASGIApp) -> None:
        self.app = app

    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
        if scope["type"] != "http":
            await self.app(scope, receive, send)
            return

        if is_rejecting_requests() and scope.get("path", "") not in _EXEMPT_PATHS:
            response = JSONResponse(
                content={"error": "Server is unavailable. Please try again later."},
                status_code=503,
            )
            await response(scope, receive, send)
            return

        try:
            await self.app(scope, receive, send)
        except asyncio.CancelledError:
            if not is_rejecting_requests():
                raise
            try:
                response = JSONResponse(
                    content={"error": "Server is shutting down."},
                    status_code=503,
                )
                await response(scope, receive, send)
            except (Exception, asyncio.CancelledError):
                pass