Skip to content

vllm.entrypoints.serve.tokenize.protocol

TokenizeRequest module-attribute

DetokenizeRequest

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py
class DetokenizeRequest(OpenAIBaseModel):
    model: str | None = None
    tokens: list[int]

model class-attribute instance-attribute

model: str | None = None

tokens instance-attribute

tokens: list[int]

DetokenizeResponse

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py
class DetokenizeResponse(OpenAIBaseModel):
    prompt: str

prompt instance-attribute

prompt: str

TokenizeChatRequest

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py
class TokenizeChatRequest(OpenAIBaseModel):
    model: str | None = None
    messages: list[ChatCompletionMessageParam]

    add_generation_prompt: bool = Field(
        default=True,
        description=(
            "If true, the generation prompt will be added to the chat template. "
            "This is a parameter used by chat template in tokenizer config of the "
            "model."
        ),
    )
    return_token_strs: bool | None = Field(
        default=False,
        description=(
            "If true, also return the token strings corresponding to the token ids."
        ),
    )
    continue_final_message: bool = Field(
        default=False,
        description=(
            "If this is set, the chat will be formatted so that the final "
            "message in the chat is open-ended, without any EOS tokens. The "
            "model will continue this message rather than starting a new one. "
            'This allows you to "prefill" part of the model\'s response for it. '
            "Cannot be used at the same time as `add_generation_prompt`."
        ),
    )
    add_special_tokens: bool = Field(
        default=False,
        description=(
            "If true, special tokens (e.g. BOS) will be added to the prompt "
            "on top of what is added by the chat template. "
            "For most models, the chat template takes care of adding the "
            "special tokens so this should be set to false (as is the "
            "default)."
        ),
    )
    chat_template: str | None = Field(
        default=None,
        description=(
            "A Jinja template to use for this conversion. "
            "As of transformers v4.44, default chat template is no longer "
            "allowed, so you must provide a chat template if the tokenizer "
            "does not define one."
        ),
    )
    chat_template_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Additional keyword args to pass to the template renderer. "
            "Will be accessible by the chat template."
        ),
    )
    mm_processor_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=("Additional kwargs to pass to the HF processor."),
    )
    tools: list[ChatCompletionToolsParam] | None = Field(
        default=None,
        description=("A list of tools the model may call."),
    )

    @model_validator(mode="before")
    @classmethod
    def check_generation_prompt(cls, data):
        if data.get("continue_final_message") and data.get("add_generation_prompt"):
            raise ValueError(
                "Cannot set both `continue_final_message` and "
                "`add_generation_prompt` to True."
            )
        return data

add_generation_prompt class-attribute instance-attribute

add_generation_prompt: bool = Field(
    default=True,
    description="If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.",
)

add_special_tokens class-attribute instance-attribute

add_special_tokens: bool = Field(
    default=False,
    description="If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).",
)

chat_template class-attribute instance-attribute

chat_template: str | None = Field(
    default=None,
    description="A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.",
)

chat_template_kwargs class-attribute instance-attribute

chat_template_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional keyword args to pass to the template renderer. Will be accessible by the chat template.",
)

continue_final_message class-attribute instance-attribute

continue_final_message: bool = Field(
    default=False,
    description='If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to "prefill" part of the model\'s response for it. Cannot be used at the same time as `add_generation_prompt`.',
)

messages instance-attribute

mm_processor_kwargs class-attribute instance-attribute

mm_processor_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional kwargs to pass to the HF processor.",
)

model class-attribute instance-attribute

model: str | None = None

return_token_strs class-attribute instance-attribute

return_token_strs: bool | None = Field(
    default=False,
    description="If true, also return the token strings corresponding to the token ids.",
)

tools class-attribute instance-attribute

tools: list[ChatCompletionToolsParam] | None = Field(
    default=None,
    description="A list of tools the model may call.",
)

check_generation_prompt classmethod

check_generation_prompt(data)
Source code in vllm/entrypoints/serve/tokenize/protocol.py
@model_validator(mode="before")
@classmethod
def check_generation_prompt(cls, data):
    if data.get("continue_final_message") and data.get("add_generation_prompt"):
        raise ValueError(
            "Cannot set both `continue_final_message` and "
            "`add_generation_prompt` to True."
        )
    return data

TokenizeCompletionRequest

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py
class TokenizeCompletionRequest(OpenAIBaseModel):
    model: str | None = None
    prompt: str

    add_special_tokens: bool = Field(
        default=True,
        description=(
            "If true (the default), special tokens (e.g. BOS) will be added to "
            "the prompt."
        ),
    )
    return_token_strs: bool | None = Field(
        default=False,
        description=(
            "If true, also return the token strings corresponding to the token ids."
        ),
    )

add_special_tokens class-attribute instance-attribute

add_special_tokens: bool = Field(
    default=True,
    description="If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
)

model class-attribute instance-attribute

model: str | None = None

prompt instance-attribute

prompt: str

return_token_strs class-attribute instance-attribute

return_token_strs: bool | None = Field(
    default=False,
    description="If true, also return the token strings corresponding to the token ids.",
)

TokenizeResponse

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py
class TokenizeResponse(OpenAIBaseModel):
    count: int
    max_model_len: int
    tokens: list[int]
    token_strs: list[str] | None = None

count instance-attribute

count: int

max_model_len instance-attribute

max_model_len: int

token_strs class-attribute instance-attribute

token_strs: list[str] | None = None

tokens instance-attribute

tokens: list[int]

TokenizerInfoResponse

Bases: OpenAIBaseModel

Response containing tokenizer configuration equivalent to tokenizer_config.json

Source code in vllm/entrypoints/serve/tokenize/protocol.py
class TokenizerInfoResponse(OpenAIBaseModel):
    """
    Response containing tokenizer configuration
    equivalent to tokenizer_config.json
    """

    model_config = ConfigDict(extra="allow")
    tokenizer_class: str

model_config class-attribute instance-attribute

model_config = ConfigDict(extra='allow')

tokenizer_class instance-attribute

tokenizer_class: str