Skip to content

vllm.profiler.wrapper

WorkerProfiler

Bases: ABC

Source code in vllm/profiler/wrapper.py
class WorkerProfiler(ABC):
    def __init__(self, profiler_config: ProfilerConfig) -> None:
        self._delay_iters = profiler_config.delay_iterations
        if self._delay_iters > 0:
            logger.info_once(
                "GPU profiling will start "
                f"{self._delay_iters} steps after start_profile."
            )

        self._max_iters = profiler_config.max_iterations
        if self._max_iters > 0:
            logger.info_once(
                "GPU profiling will stop "
                f"after {self._max_iters} worker steps, "
                "or when stop_profile is received."
            )

        # Track when the profiler gets triggered by start_profile
        self._active_iteration_count = 0
        self._active = False

        # Track when the profiler is actually running
        self._profiling_for_iters = 0
        self._running = False

    @abstractmethod
    def _start(self) -> None:
        """Start the profiler."""
        pass

    @abstractmethod
    def _stop(self) -> None:
        """Stop the profiler."""
        pass

    def _call_start(self) -> None:
        """Call _start with error handling but no safeguards."""
        try:
            self._start()
            self._running = True  # Only mark as running if start succeeds
        except Exception as e:
            logger.warning("Failed to start profiler: %s", e)

    def _call_stop(self) -> None:
        """Call _stop with error handling but no safeguards."""
        try:
            self._stop()
            logger.info_once("Profiler stopped successfully.", scope="local")
        except Exception as e:
            logger.warning("Failed to stop profiler: %s", e)
        self._running = False  # Always mark as not running, assume stop worked

    def start(self) -> None:
        """Attempt to start the profiler, accounting for delayed starts."""
        if self._active:
            logger.debug(
                "start_profile received when profiler is already active. "
                "Ignoring request."
            )
            return
        self._active = True
        if self._delay_iters == 0:
            self._call_start()

    def step(self) -> None:
        """Update the profiler state at each worker step,
        to handle delayed starts and max iteration limits."""
        if not self._active:
            return

        self._active_iteration_count += 1

        if (
            not self._running
            and self._delay_iters > 0
            and self._active_iteration_count == self._delay_iters
        ):
            logger.info_once("Starting profiler after delay...", scope="local")
            self._call_start()

        if self._running:
            self._profiling_for_iters += 1

        if (
            self._max_iters > 0
            and self._running
            and self._profiling_for_iters > self._max_iters
        ):
            # Automatically stop the profiler after max iters
            # will be marked as not running, but leave as active so that stop
            # can clean up properly
            logger.info_once(
                "Max profiling iterations reached. Stopping profiler...", scope="local"
            )
            self._call_stop()
            return

    def stop(self) -> None:
        """Attempt to stop the profiler, accounting for overlapped calls."""
        if not self._active:
            logger.debug(
                "stop_profile received when profiler is not active. Ignoring request."
            )
            return
        self._active = False
        self._active_iteration_count = 0
        self._profiling_for_iters = 0

        if self._running:
            self._call_stop()

    def shutdown(self) -> None:
        """Ensure profiler is stopped when shutting down."""
        logger.info_once("Shutting down profiler", scope="local")
        if self._running:
            self.stop()

    def annotate_context_manager(self, name: str):
        """Return a context manager to annotate profiler traces."""
        return nullcontext()

_call_start

_call_start() -> None

Call _start with error handling but no safeguards.

Source code in vllm/profiler/wrapper.py
def _call_start(self) -> None:
    """Call _start with error handling but no safeguards."""
    try:
        self._start()
        self._running = True  # Only mark as running if start succeeds
    except Exception as e:
        logger.warning("Failed to start profiler: %s", e)

_call_stop

_call_stop() -> None

Call _stop with error handling but no safeguards.

Source code in vllm/profiler/wrapper.py
def _call_stop(self) -> None:
    """Call _stop with error handling but no safeguards."""
    try:
        self._stop()
        logger.info_once("Profiler stopped successfully.", scope="local")
    except Exception as e:
        logger.warning("Failed to stop profiler: %s", e)
    self._running = False  # Always mark as not running, assume stop worked

_start abstractmethod

_start() -> None

Start the profiler.

Source code in vllm/profiler/wrapper.py
@abstractmethod
def _start(self) -> None:
    """Start the profiler."""
    pass

_stop abstractmethod

_stop() -> None

Stop the profiler.

Source code in vllm/profiler/wrapper.py
@abstractmethod
def _stop(self) -> None:
    """Stop the profiler."""
    pass

annotate_context_manager

annotate_context_manager(name: str)

Return a context manager to annotate profiler traces.

Source code in vllm/profiler/wrapper.py
def annotate_context_manager(self, name: str):
    """Return a context manager to annotate profiler traces."""
    return nullcontext()

shutdown

shutdown() -> None

Ensure profiler is stopped when shutting down.

Source code in vllm/profiler/wrapper.py
def shutdown(self) -> None:
    """Ensure profiler is stopped when shutting down."""
    logger.info_once("Shutting down profiler", scope="local")
    if self._running:
        self.stop()

start

start() -> None

Attempt to start the profiler, accounting for delayed starts.

Source code in vllm/profiler/wrapper.py
def start(self) -> None:
    """Attempt to start the profiler, accounting for delayed starts."""
    if self._active:
        logger.debug(
            "start_profile received when profiler is already active. "
            "Ignoring request."
        )
        return
    self._active = True
    if self._delay_iters == 0:
        self._call_start()

step

step() -> None

Update the profiler state at each worker step, to handle delayed starts and max iteration limits.

Source code in vllm/profiler/wrapper.py
def step(self) -> None:
    """Update the profiler state at each worker step,
    to handle delayed starts and max iteration limits."""
    if not self._active:
        return

    self._active_iteration_count += 1

    if (
        not self._running
        and self._delay_iters > 0
        and self._active_iteration_count == self._delay_iters
    ):
        logger.info_once("Starting profiler after delay...", scope="local")
        self._call_start()

    if self._running:
        self._profiling_for_iters += 1

    if (
        self._max_iters > 0
        and self._running
        and self._profiling_for_iters > self._max_iters
    ):
        # Automatically stop the profiler after max iters
        # will be marked as not running, but leave as active so that stop
        # can clean up properly
        logger.info_once(
            "Max profiling iterations reached. Stopping profiler...", scope="local"
        )
        self._call_stop()
        return

stop

stop() -> None

Attempt to stop the profiler, accounting for overlapped calls.

Source code in vllm/profiler/wrapper.py
def stop(self) -> None:
    """Attempt to stop the profiler, accounting for overlapped calls."""
    if not self._active:
        logger.debug(
            "stop_profile received when profiler is not active. Ignoring request."
        )
        return
    self._active = False
    self._active_iteration_count = 0
    self._profiling_for_iters = 0

    if self._running:
        self._call_stop()