Skip to content

vllm.sequence

Sequence and its related classes.

IntermediateTensors dataclass

For all pipeline stages except the last, we need to return the hidden states and residuals to be sent to the next stage. This data structure contains the hidden states and residuals for a request.

Each stage also needs to handle its own kv_connector_output.

Source code in vllm/sequence.py
@dataclass
class IntermediateTensors:
    """For all pipeline stages except the last, we need to return the hidden
    states and residuals to be sent to the next stage. This data structure
    contains the hidden states and residuals for a request.

    Each stage also needs to handle its own kv_connector_output.
    """

    tensors: dict[str, torch.Tensor]
    kv_connector_output: KVConnectorOutput | None

    def __init__(
        self,
        tensors: dict[str, torch.Tensor],
        kv_connector_output: KVConnectorOutput | None = None,
    ) -> None:
        # manually define this function, so that
        # Dynamo knows `IntermediateTensors()` comes from this file.
        # Otherwise, dataclass will generate this function by evaluating
        # a string, and we will lose the information about the source file.
        self.tensors = tensors
        self.kv_connector_output = kv_connector_output

    def __getitem__(self, key: str | slice):
        if isinstance(key, str):
            return self.tensors[key]
        elif isinstance(key, slice):
            return self.__class__({k: v[key] for k, v in self.tensors.items()})

    def __setitem__(self, key: str, value: torch.Tensor):
        self.tensors[key] = value

    def items(self):
        return self.tensors.items()

    def __len__(self):
        return len(self.tensors)

    def __eq__(self, other: object):
        if not isinstance(other, self.__class__):
            return False
        if self.tensors.keys() != other.tensors.keys():
            return False
        return all(torch.equal(self.tensors[k], other.tensors[k]) for k in self.tensors)

    def __repr__(self) -> str:
        return f"IntermediateTensors(tensors={self.tensors})"

kv_connector_output instance-attribute

kv_connector_output: KVConnectorOutput | None = (
    kv_connector_output
)

tensors instance-attribute

tensors: dict[str, Tensor] = tensors

__eq__

__eq__(other: object)
Source code in vllm/sequence.py
def __eq__(self, other: object):
    if not isinstance(other, self.__class__):
        return False
    if self.tensors.keys() != other.tensors.keys():
        return False
    return all(torch.equal(self.tensors[k], other.tensors[k]) for k in self.tensors)

__getitem__

__getitem__(key: str | slice)
Source code in vllm/sequence.py
def __getitem__(self, key: str | slice):
    if isinstance(key, str):
        return self.tensors[key]
    elif isinstance(key, slice):
        return self.__class__({k: v[key] for k, v in self.tensors.items()})

__init__

__init__(
    tensors: dict[str, Tensor],
    kv_connector_output: KVConnectorOutput | None = None,
) -> None
Source code in vllm/sequence.py
def __init__(
    self,
    tensors: dict[str, torch.Tensor],
    kv_connector_output: KVConnectorOutput | None = None,
) -> None:
    # manually define this function, so that
    # Dynamo knows `IntermediateTensors()` comes from this file.
    # Otherwise, dataclass will generate this function by evaluating
    # a string, and we will lose the information about the source file.
    self.tensors = tensors
    self.kv_connector_output = kv_connector_output

__len__

__len__()
Source code in vllm/sequence.py
def __len__(self):
    return len(self.tensors)

__repr__

__repr__() -> str
Source code in vllm/sequence.py
def __repr__(self) -> str:
    return f"IntermediateTensors(tensors={self.tensors})"

__setitem__

__setitem__(key: str, value: Tensor)
Source code in vllm/sequence.py
def __setitem__(self, key: str, value: torch.Tensor):
    self.tensors[key] = value

items

items()
Source code in vllm/sequence.py
def items(self):
    return self.tensors.items()