vllm.entrypoints.serve.tokenize.protocol ¶

TokenizeRequest `module-attribute` ¶

TokenizeRequest: TypeAlias = (
    TokenizeCompletionRequest | TokenizeChatRequest
)

DetokenizeRequest ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py

class DetokenizeRequest(OpenAIBaseModel):
    model: str | None = None
    tokens: list[int]

model `class-attribute` `instance-attribute` ¶

model: str | None = None

tokens `instance-attribute` ¶

tokens: list[int]

DetokenizeResponse ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py

class DetokenizeResponse(OpenAIBaseModel):
    prompt: str

prompt `instance-attribute` ¶

prompt: str

TokenizeChatRequest ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py

class TokenizeChatRequest(OpenAIBaseModel):
    model: str | None = None
    messages: list[ChatCompletionMessageParam]

    add_generation_prompt: bool = Field(
        default=True,
        description=(
            "If true, the generation prompt will be added to the chat template. "
            "This is a parameter used by chat template in tokenizer config of the "
            "model."
        ),
    )
    return_token_strs: bool | None = Field(
        default=False,
        description=(
            "If true, also return the token strings corresponding to the token ids."
        ),
    )
    continue_final_message: bool = Field(
        default=False,
        description=(
            "If this is set, the chat will be formatted so that the final "
            "message in the chat is open-ended, without any EOS tokens. The "
            "model will continue this message rather than starting a new one. "
            'This allows you to "prefill" part of the model\'s response for it. '
            "Cannot be used at the same time as `add_generation_prompt`."
        ),
    )
    add_special_tokens: bool = Field(
        default=False,
        description=(
            "If true, special tokens (e.g. BOS) will be added to the prompt "
            "on top of what is added by the chat template. "
            "For most models, the chat template takes care of adding the "
            "special tokens so this should be set to false (as is the "
            "default)."
        ),
    )
    chat_template: str | None = Field(
        default=None,
        description=(
            "A Jinja template to use for this conversion. "
            "As of transformers v4.44, default chat template is no longer "
            "allowed, so you must provide a chat template if the tokenizer "
            "does not define one."
        ),
    )
    chat_template_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Additional keyword args to pass to the template renderer. "
            "Will be accessible by the chat template."
        ),
    )
    mm_processor_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=("Additional kwargs to pass to the HF processor."),
    )
    tools: list[ChatCompletionToolsParam] | None = Field(
        default=None,
        description=("A list of tools the model may call."),
    )

    @model_validator(mode="before")
    @classmethod
    def check_generation_prompt(cls, data):
        if data.get("continue_final_message") and data.get("add_generation_prompt"):
            raise ValueError(
                "Cannot set both `continue_final_message` and "
                "`add_generation_prompt` to True."
            )
        return data

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_generation_prompt: bool = Field(
    default=True,
    description="If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.",
)

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=False,
    description="If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).",
)

chat_template `class-attribute` `instance-attribute` ¶

chat_template: str | None = Field(
    default=None,
    description="A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.",
)

chat_template_kwargs `class-attribute` `instance-attribute` ¶

chat_template_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional keyword args to pass to the template renderer. Will be accessible by the chat template.",
)

continue_final_message `class-attribute` `instance-attribute` ¶

continue_final_message: bool = Field(
    default=False,
    description='If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to "prefill" part of the model\'s response for it. Cannot be used at the same time as `add_generation_prompt`.',
)

messages `instance-attribute` ¶

messages: list[ChatCompletionMessageParam]

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

mm_processor_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional kwargs to pass to the HF processor.",
)

model `class-attribute` `instance-attribute` ¶

model: str | None = None

return_token_strs `class-attribute` `instance-attribute` ¶

return_token_strs: bool | None = Field(
    default=False,
    description="If true, also return the token strings corresponding to the token ids.",
)

tools `class-attribute` `instance-attribute` ¶

tools: list[ChatCompletionToolsParam] | None = Field(
    default=None,
    description="A list of tools the model may call.",
)

check_generation_prompt `classmethod` ¶

check_generation_prompt(data)

Source code in vllm/entrypoints/serve/tokenize/protocol.py

@model_validator(mode="before")
@classmethod
def check_generation_prompt(cls, data):
    if data.get("continue_final_message") and data.get("add_generation_prompt"):
        raise ValueError(
            "Cannot set both `continue_final_message` and "
            "`add_generation_prompt` to True."
        )
    return data

TokenizeCompletionRequest ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py

class TokenizeCompletionRequest(OpenAIBaseModel):
    model: str | None = None
    prompt: str

    add_special_tokens: bool = Field(
        default=True,
        description=(
            "If true (the default), special tokens (e.g. BOS) will be added to "
            "the prompt."
        ),
    )
    return_token_strs: bool | None = Field(
        default=False,
        description=(
            "If true, also return the token strings corresponding to the token ids."
        ),
    )

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=True,
    description="If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
)

model `class-attribute` `instance-attribute` ¶

model: str | None = None

prompt `instance-attribute` ¶

prompt: str

return_token_strs `class-attribute` `instance-attribute` ¶

return_token_strs: bool | None = Field(
    default=False,
    description="If true, also return the token strings corresponding to the token ids.",
)

TokenizeResponse ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/serve/tokenize/protocol.py

class TokenizeResponse(OpenAIBaseModel):
    count: int
    max_model_len: int
    tokens: list[int]
    token_strs: list[str] | None = None

count `instance-attribute` ¶

count: int

max_model_len `instance-attribute` ¶

max_model_len: int

token_strs `class-attribute` `instance-attribute` ¶

token_strs: list[str] | None = None

tokens `instance-attribute` ¶

tokens: list[int]

TokenizerInfoResponse ¶

Bases: OpenAIBaseModel

Response containing tokenizer configuration equivalent to tokenizer_config.json

Source code in vllm/entrypoints/serve/tokenize/protocol.py

class TokenizerInfoResponse(OpenAIBaseModel):
    """
    Response containing tokenizer configuration
    equivalent to tokenizer_config.json
    """

    model_config = ConfigDict(extra="allow")
    tokenizer_class: str

model_config `class-attribute` `instance-attribute` ¶

model_config = ConfigDict(extra='allow')

tokenizer_class `instance-attribute` ¶

tokenizer_class: str

vllm.entrypoints.serve.tokenize.protocol ¶

TokenizeRequest module-attribute ¶

DetokenizeRequest ¶

model class-attribute instance-attribute ¶

tokens instance-attribute ¶

DetokenizeResponse ¶

prompt instance-attribute ¶

TokenizeChatRequest ¶

add_generation_prompt class-attribute instance-attribute ¶

add_special_tokens class-attribute instance-attribute ¶

chat_template class-attribute instance-attribute ¶

chat_template_kwargs class-attribute instance-attribute ¶

continue_final_message class-attribute instance-attribute ¶

messages instance-attribute ¶

mm_processor_kwargs class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

return_token_strs class-attribute instance-attribute ¶

tools class-attribute instance-attribute ¶

check_generation_prompt classmethod ¶

TokenizeCompletionRequest ¶

add_special_tokens class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

prompt instance-attribute ¶

return_token_strs class-attribute instance-attribute ¶

TokenizeResponse ¶

count instance-attribute ¶

max_model_len instance-attribute ¶

token_strs class-attribute instance-attribute ¶

tokens instance-attribute ¶

TokenizerInfoResponse ¶

model_config class-attribute instance-attribute ¶

tokenizer_class instance-attribute ¶

TokenizeRequest `module-attribute` ¶

model `class-attribute` `instance-attribute` ¶

tokens `instance-attribute` ¶

prompt `instance-attribute` ¶

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

chat_template `class-attribute` `instance-attribute` ¶

chat_template_kwargs `class-attribute` `instance-attribute` ¶

continue_final_message `class-attribute` `instance-attribute` ¶

messages `instance-attribute` ¶

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

return_token_strs `class-attribute` `instance-attribute` ¶

tools `class-attribute` `instance-attribute` ¶

check_generation_prompt `classmethod` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

prompt `instance-attribute` ¶

return_token_strs `class-attribute` `instance-attribute` ¶

count `instance-attribute` ¶

max_model_len `instance-attribute` ¶

token_strs `class-attribute` `instance-attribute` ¶

tokens `instance-attribute` ¶

model_config `class-attribute` `instance-attribute` ¶

tokenizer_class `instance-attribute` ¶