Skip to content

vllm.model_executor.layers.pooler.tokwise.heads

TokenPoolerHeadOutputItem module-attribute

TokenPoolerHeadOutputItem: TypeAlias = Tensor | None

TokenClassifierPoolerHead

Bases: TokenPoolerHead

Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
class TokenClassifierPoolerHead(TokenPoolerHead):
    def __init__(
        self,
        classifier: ClassifierFn | None = None,
        logit_bias: float | None = None,
        head_dtype: torch.dtype | str | None = None,
        activation: ActivationFn | None = None,
    ) -> None:
        super().__init__()

        self.classifier = classifier
        self.logit_bias = logit_bias
        self.head_dtype = head_dtype
        self.activation = activation

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"token_classify"}

    def forward_chunk(
        self,
        pooled_data: TokenPoolingMethodOutputItem,
        pooling_param: PoolingParams,
    ) -> TokenPoolerHeadOutputItem:
        # for unfinished chunked prefill
        if pooled_data is None:
            return None

        if self.head_dtype is not None:
            pooled_data = pooled_data.to(self.head_dtype)
        # hidden_states shape: [n_token, hidden_size]

        if self.classifier is not None:
            scores = self.classifier(pooled_data)
        else:
            scores = pooled_data
        # scores shape: [n_token, num_labels]

        if self.logit_bias is not None:
            scores -= self.logit_bias

        if self.activation is not None and pooling_param.use_activation:
            scores = self.activation(scores)

        # scores shape: [n_token, num_labels]
        return scores

activation instance-attribute

activation = activation

classifier instance-attribute

classifier = classifier

head_dtype instance-attribute

head_dtype = head_dtype

logit_bias instance-attribute

logit_bias = logit_bias

__init__

__init__(
    classifier: ClassifierFn | None = None,
    logit_bias: float | None = None,
    head_dtype: dtype | str | None = None,
    activation: ActivationFn | None = None,
) -> None
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def __init__(
    self,
    classifier: ClassifierFn | None = None,
    logit_bias: float | None = None,
    head_dtype: torch.dtype | str | None = None,
    activation: ActivationFn | None = None,
) -> None:
    super().__init__()

    self.classifier = classifier
    self.logit_bias = logit_bias
    self.head_dtype = head_dtype
    self.activation = activation

forward_chunk

forward_chunk(
    pooled_data: TokenPoolingMethodOutputItem,
    pooling_param: PoolingParams,
) -> TokenPoolerHeadOutputItem
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def forward_chunk(
    self,
    pooled_data: TokenPoolingMethodOutputItem,
    pooling_param: PoolingParams,
) -> TokenPoolerHeadOutputItem:
    # for unfinished chunked prefill
    if pooled_data is None:
        return None

    if self.head_dtype is not None:
        pooled_data = pooled_data.to(self.head_dtype)
    # hidden_states shape: [n_token, hidden_size]

    if self.classifier is not None:
        scores = self.classifier(pooled_data)
    else:
        scores = pooled_data
    # scores shape: [n_token, num_labels]

    if self.logit_bias is not None:
        scores -= self.logit_bias

    if self.activation is not None and pooling_param.use_activation:
        scores = self.activation(scores)

    # scores shape: [n_token, num_labels]
    return scores

get_supported_tasks

get_supported_tasks() -> Set[PoolingTask]
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"token_classify"}

TokenEmbeddingPoolerHead

Bases: TokenPoolerHead

Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
class TokenEmbeddingPoolerHead(TokenPoolerHead):
    def __init__(
        self,
        head_dtype: torch.dtype | str | None = None,
        projector: ProjectorFn | None = None,
        activation: ActivationFn | None = None,
    ) -> None:
        super().__init__()

        self.head_dtype = head_dtype
        self.projector = projector
        self.activation = activation

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"token_embed"}

    def forward_chunk(
        self,
        pooled_data: TokenPoolingMethodOutputItem,
        pooling_param: PoolingParams,
    ) -> TokenPoolerHeadOutputItem:
        # for unfinished chunked prefill
        if pooled_data is None:
            return None

        if self.head_dtype is not None:
            pooled_data = pooled_data.to(self.head_dtype)
        # pooled_data shape: [n_tokens, hidden_dimension]

        # Apply ST projector
        if self.projector is not None:
            pooled_data = self.projector(pooled_data)
        # pooled_data shape: [n_tokens, embedding_dimension]

        # for matryoshka representation
        pooled_data = pooled_data[..., : pooling_param.dimensions]

        # for normalize
        if self.activation is not None and pooling_param.use_activation:
            pooled_data = self.activation(pooled_data)

        # pooled_data shape: [n_tokens, embedding_dimension]
        return pooled_data

activation instance-attribute

activation = activation

head_dtype instance-attribute

head_dtype = head_dtype

projector instance-attribute

projector = projector

__init__

__init__(
    head_dtype: dtype | str | None = None,
    projector: ProjectorFn | None = None,
    activation: ActivationFn | None = None,
) -> None
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def __init__(
    self,
    head_dtype: torch.dtype | str | None = None,
    projector: ProjectorFn | None = None,
    activation: ActivationFn | None = None,
) -> None:
    super().__init__()

    self.head_dtype = head_dtype
    self.projector = projector
    self.activation = activation

forward_chunk

forward_chunk(
    pooled_data: TokenPoolingMethodOutputItem,
    pooling_param: PoolingParams,
) -> TokenPoolerHeadOutputItem
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def forward_chunk(
    self,
    pooled_data: TokenPoolingMethodOutputItem,
    pooling_param: PoolingParams,
) -> TokenPoolerHeadOutputItem:
    # for unfinished chunked prefill
    if pooled_data is None:
        return None

    if self.head_dtype is not None:
        pooled_data = pooled_data.to(self.head_dtype)
    # pooled_data shape: [n_tokens, hidden_dimension]

    # Apply ST projector
    if self.projector is not None:
        pooled_data = self.projector(pooled_data)
    # pooled_data shape: [n_tokens, embedding_dimension]

    # for matryoshka representation
    pooled_data = pooled_data[..., : pooling_param.dimensions]

    # for normalize
    if self.activation is not None and pooling_param.use_activation:
        pooled_data = self.activation(pooled_data)

    # pooled_data shape: [n_tokens, embedding_dimension]
    return pooled_data

get_supported_tasks

get_supported_tasks() -> Set[PoolingTask]
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"token_embed"}

TokenPoolerHead

Bases: Module, ABC

Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
class TokenPoolerHead(nn.Module, ABC):
    @abstractmethod
    def get_supported_tasks(self) -> Set[PoolingTask]:
        raise NotImplementedError

    @abstractmethod
    def forward_chunk(
        self,
        pooled_data: TokenPoolingMethodOutputItem,
        pooling_param: PoolingParams,
    ) -> TokenPoolerHeadOutputItem:
        raise NotImplementedError

    def forward(
        self,
        pooled_data: list[TokenPoolingMethodOutputItem],
        pooling_metadata: PoolingMetadata,
    ) -> list[TokenPoolerHeadOutputItem]:
        pooling_params = pooling_metadata.pooling_params
        assert len(pooled_data) == len(pooling_params)

        return [self.forward_chunk(d, p) for d, p in zip(pooled_data, pooling_params)]

forward

forward(
    pooled_data: list[TokenPoolingMethodOutputItem],
    pooling_metadata: PoolingMetadata,
) -> list[TokenPoolerHeadOutputItem]
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
def forward(
    self,
    pooled_data: list[TokenPoolingMethodOutputItem],
    pooling_metadata: PoolingMetadata,
) -> list[TokenPoolerHeadOutputItem]:
    pooling_params = pooling_metadata.pooling_params
    assert len(pooled_data) == len(pooling_params)

    return [self.forward_chunk(d, p) for d, p in zip(pooled_data, pooling_params)]

forward_chunk abstractmethod

forward_chunk(
    pooled_data: TokenPoolingMethodOutputItem,
    pooling_param: PoolingParams,
) -> TokenPoolerHeadOutputItem
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
@abstractmethod
def forward_chunk(
    self,
    pooled_data: TokenPoolingMethodOutputItem,
    pooling_param: PoolingParams,
) -> TokenPoolerHeadOutputItem:
    raise NotImplementedError

get_supported_tasks abstractmethod

get_supported_tasks() -> Set[PoolingTask]
Source code in vllm/model_executor/layers/pooler/tokwise/heads.py
@abstractmethod
def get_supported_tasks(self) -> Set[PoolingTask]:
    raise NotImplementedError