vllm.model_executor.models.interfaces_base ¶

T `module-attribute` ¶

T = TypeVar('T', default=Tensor)

T_co `module-attribute` ¶

T_co = TypeVar('T_co', default=Tensor, covariant=True)

_T `module-attribute` ¶

_T = TypeVar('_T', bound=type[Module])

logger `module-attribute` ¶

logger = init_logger(__name__)

VllmModel ¶

Bases: Protocol[T_co]

The interface required for all models in vLLM.

Source code in vllm/model_executor/models/interfaces_base.py

@runtime_checkable
class VllmModel(Protocol[T_co]):
    """The interface required for all models in vLLM."""

    def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: ...

    def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
        """Apply token embeddings to `input_ids`."""
        ...

    def forward(self, input_ids: torch.Tensor, positions: torch.Tensor) -> T_co: ...

init ¶

__init__(vllm_config: VllmConfig, prefix: str = '') -> None

Source code in vllm/model_executor/models/interfaces_base.py

def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: ...

embed_input_ids ¶

embed_input_ids(input_ids: Tensor) -> Tensor

Apply token embeddings to input_ids.

Source code in vllm/model_executor/models/interfaces_base.py

def embed_input_ids(self, input_ids: torch.Tensor) -> torch.Tensor:
    """Apply token embeddings to `input_ids`."""
    ...

forward ¶

forward(input_ids: Tensor, positions: Tensor) -> T_co

Source code in vllm/model_executor/models/interfaces_base.py

def forward(self, input_ids: torch.Tensor, positions: torch.Tensor) -> T_co: ...

VllmModelForPooling ¶

Bases: VllmModel[T_co], Protocol[T_co]

The interface required for all pooling models in vLLM.

Source code in vllm/model_executor/models/interfaces_base.py

@runtime_checkable
class VllmModelForPooling(VllmModel[T_co], Protocol[T_co]):
    """The interface required for all pooling models in vLLM."""

    is_pooling_model: ClassVar[Literal[True]] = True
    """
    A flag that indicates this model supports pooling.

    Note:
        There is no need to redefine this flag if this class is in the
        MRO of your model class.
    """

    default_seq_pooling_type: ClassVar[SequencePoolingType] = "LAST"
    """
    Indicates the [vllm.config.pooler.PoolerConfig.seq_pooling_type][]
    to use by default.

    You can use the
    [vllm.model_executor.models.interfaces_base.default_pooling_type][]
    decorator to conveniently set this field.
    """

    default_tok_pooling_type: ClassVar[TokenPoolingType] = "ALL"
    """
    Indicates the [vllm.config.pooler.PoolerConfig.tok_pooling_type][]
    to use by default.

    You can use the
    [vllm.model_executor.models.interfaces_base.default_pooling_type][]
    decorator to conveniently set this field.
    """

    attn_type: ClassVar[AttnTypeStr] = "decoder"
    """
    Indicates the
    [vllm.config.model.ModelConfig.attn_type][]
    to use by default.

    You can use the
    [vllm.model_executor.models.interfaces_base.attn_type][]
    decorator to conveniently set this field.
    """

    pooler: Pooler
    """The pooler is only called on TP rank 0."""

attn_type `class-attribute` ¶

attn_type: AttnTypeStr = 'decoder'

Indicates the vllm.config.model.ModelConfig.attn_type to use by default.

You can use the vllm.model_executor.models.interfaces_base.attn_type decorator to conveniently set this field.

default_seq_pooling_type `class-attribute` ¶

default_seq_pooling_type: SequencePoolingType = 'LAST'

Indicates the vllm.config.pooler.PoolerConfig.seq_pooling_type to use by default.

You can use the vllm.model_executor.models.interfaces_base.default_pooling_type decorator to conveniently set this field.

default_tok_pooling_type `class-attribute` ¶

default_tok_pooling_type: TokenPoolingType = 'ALL'

Indicates the vllm.config.pooler.PoolerConfig.tok_pooling_type to use by default.

You can use the vllm.model_executor.models.interfaces_base.default_pooling_type decorator to conveniently set this field.

is_pooling_model `class-attribute` ¶

is_pooling_model: Literal[True] = True

A flag that indicates this model supports pooling.

Note

There is no need to redefine this flag if this class is in the MRO of your model class.

pooler `instance-attribute` ¶

pooler: Pooler

The pooler is only called on TP rank 0.

VllmModelForTextGeneration ¶

Bases: VllmModel[T], Protocol[T]

The interface required for all generative models in vLLM.

Source code in vllm/model_executor/models/interfaces_base.py

@runtime_checkable
class VllmModelForTextGeneration(VllmModel[T], Protocol[T]):
    """The interface required for all generative models in vLLM."""

    def compute_logits(
        self,
        hidden_states: T,
    ) -> T | None:
        """Return `None` if TP rank > 0."""
        ...

compute_logits ¶

compute_logits(hidden_states: T) -> T | None

Return None if TP rank > 0.

Source code in vllm/model_executor/models/interfaces_base.py

def compute_logits(
    self,
    hidden_states: T,
) -> T | None:
    """Return `None` if TP rank > 0."""
    ...

_check_vllm_model_embed_input_ids ¶

_check_vllm_model_embed_input_ids(
    model: type[object] | object,
) -> bool

Source code in vllm/model_executor/models/interfaces_base.py

def _check_vllm_model_embed_input_ids(model: type[object] | object) -> bool:
    model_embed_input_ids = getattr(model, "embed_input_ids", None)
    if not callable(model_embed_input_ids):
        logger.warning(
            "The model (%s) is missing the `embed_input_ids` method.",
            model,
        )
        return False

    return True

_check_vllm_model_forward ¶

_check_vllm_model_forward(
    model: type[object] | object,
) -> bool

Source code in vllm/model_executor/models/interfaces_base.py

def _check_vllm_model_forward(model: type[object] | object) -> bool:
    model_forward = getattr(model, "forward", None)
    if not callable(model_forward):
        return False

    vllm_kws = ("input_ids", "positions")
    missing_kws = tuple(kw for kw in vllm_kws if not supports_kw(model_forward, kw))

    if missing_kws and (isinstance(model, type) and issubclass(model, nn.Module)):
        logger.warning(
            "The model (%s) is missing "
            "vLLM-specific keywords from its `forward` method: %s",
            model,
            missing_kws,
        )

    return len(missing_kws) == 0

_check_vllm_model_init ¶

_check_vllm_model_init(
    model: type[object] | object,
) -> bool

Source code in vllm/model_executor/models/interfaces_base.py

def _check_vllm_model_init(model: type[object] | object) -> bool:
    model_init = model.__init__
    return supports_kw(model_init, "vllm_config")

attn_type ¶

attn_type(attn_type: AttnTypeStr)

Decorator to set VllmModelForPooling.attn_type.

Source code in vllm/model_executor/models/interfaces_base.py

def attn_type(attn_type: AttnTypeStr):
    """Decorator to set `VllmModelForPooling.attn_type`."""

    def func(model: _T) -> _T:
        model.attn_type = attn_type  # type: ignore
        return model

    return func

default_pooling_type ¶

default_pooling_type(
    *,
    seq_pooling_type: SequencePoolingType = "LAST",
    tok_pooling_type: TokenPoolingType = "ALL",
)

Decorator to set VllmModelForPooling.default_*_pooling_type.

Source code in vllm/model_executor/models/interfaces_base.py

def default_pooling_type(
    *,
    seq_pooling_type: SequencePoolingType = "LAST",
    tok_pooling_type: TokenPoolingType = "ALL",
):
    """Decorator to set `VllmModelForPooling.default_*_pooling_type`."""

    def func(model: _T) -> _T:
        model.default_seq_pooling_type = seq_pooling_type  # type: ignore
        model.default_tok_pooling_type = tok_pooling_type  # type: ignore
        return model

    return func

get_attn_type ¶

get_attn_type(model: type[object] | object) -> AttnTypeStr

Source code in vllm/model_executor/models/interfaces_base.py

def get_attn_type(model: type[object] | object) -> AttnTypeStr:
    return getattr(model, "attn_type", "decoder")

get_default_seq_pooling_type ¶

get_default_seq_pooling_type(
    model: type[object] | object,
) -> SequencePoolingType

Source code in vllm/model_executor/models/interfaces_base.py

def get_default_seq_pooling_type(
    model: type[object] | object,
) -> SequencePoolingType:
    return getattr(model, "default_seq_pooling_type", "LAST")

get_default_tok_pooling_type ¶

get_default_tok_pooling_type(
    model: type[object] | object,
) -> TokenPoolingType

Source code in vllm/model_executor/models/interfaces_base.py

def get_default_tok_pooling_type(
    model: type[object] | object,
) -> TokenPoolingType:
    return getattr(model, "default_tok_pooling_type", "ALL")

is_pooling_model ¶

is_pooling_model(
    model: type[object],
) -> TypeIs[type[VllmModelForPooling]]

is_pooling_model(
    model: object,
) -> TypeIs[VllmModelForPooling]

is_pooling_model(
    model: type[object] | object,
) -> (
    TypeIs[type[VllmModelForPooling]]
    | TypeIs[VllmModelForPooling]
)

Source code in vllm/model_executor/models/interfaces_base.py

def is_pooling_model(
    model: type[object] | object,
) -> TypeIs[type[VllmModelForPooling]] | TypeIs[VllmModelForPooling]:
    if not is_vllm_model(model):
        return False

    return getattr(model, "is_pooling_model", False)

is_text_generation_model ¶

is_text_generation_model(
    model: type[object],
) -> TypeIs[type[VllmModelForTextGeneration]]

is_text_generation_model(
    model: object,
) -> TypeIs[VllmModelForTextGeneration]

is_text_generation_model(
    model: type[object] | object,
) -> (
    TypeIs[type[VllmModelForTextGeneration]]
    | TypeIs[VllmModelForTextGeneration]
)

Source code in vllm/model_executor/models/interfaces_base.py

def is_text_generation_model(
    model: type[object] | object,
) -> TypeIs[type[VllmModelForTextGeneration]] | TypeIs[VllmModelForTextGeneration]:
    if not is_vllm_model(model):
        return False

    if isinstance(model, type):
        return isinstance(model, VllmModelForTextGeneration)

    return isinstance(model, VllmModelForTextGeneration)

is_vllm_model ¶

is_vllm_model(
    model: type[object],
) -> TypeIs[type[VllmModel]]

is_vllm_model(model: object) -> TypeIs[VllmModel]

is_vllm_model(
    model: type[object] | object,
) -> TypeIs[type[VllmModel]] | TypeIs[VllmModel]

Source code in vllm/model_executor/models/interfaces_base.py

def is_vllm_model(
    model: type[object] | object,
) -> TypeIs[type[VllmModel]] | TypeIs[VllmModel]:
    return (
        _check_vllm_model_init(model)
        and _check_vllm_model_embed_input_ids(model)
        and _check_vllm_model_forward(model)
    )

vllm.model_executor.models.interfaces_base ¶

T module-attribute ¶

T_co module-attribute ¶

_T module-attribute ¶

logger module-attribute ¶

VllmModel ¶

__init__ ¶

embed_input_ids ¶

forward ¶

VllmModelForPooling ¶

attn_type class-attribute ¶

default_seq_pooling_type class-attribute ¶

default_tok_pooling_type class-attribute ¶

is_pooling_model class-attribute ¶

pooler instance-attribute ¶

VllmModelForTextGeneration ¶

compute_logits ¶

_check_vllm_model_embed_input_ids ¶

_check_vllm_model_forward ¶

_check_vllm_model_init ¶

attn_type ¶

default_pooling_type ¶

get_attn_type ¶

get_default_seq_pooling_type ¶

get_default_tok_pooling_type ¶

is_pooling_model ¶

is_text_generation_model ¶

is_vllm_model ¶

T `module-attribute` ¶

T_co `module-attribute` ¶

_T `module-attribute` ¶

logger `module-attribute` ¶

init ¶

attn_type `class-attribute` ¶

default_seq_pooling_type `class-attribute` ¶

default_tok_pooling_type `class-attribute` ¶

is_pooling_model `class-attribute` ¶

pooler `instance-attribute` ¶