vllm.entrypoints.openai.chat_completion.protocol ¶

_LONG_INFO `module-attribute` ¶

_LONG_INFO = iinfo(long)

logger `module-attribute` ¶

logger = init_logger(__name__)

ChatCompletionLogProb ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionLogProb(OpenAIBaseModel):
    token: str
    logprob: float = -9999.0
    bytes: list[int] | None = None

bytes `class-attribute` `instance-attribute` ¶

bytes: list[int] | None = None

logprob `class-attribute` `instance-attribute` ¶

logprob: float = -9999.0

token `instance-attribute` ¶

token: str

ChatCompletionLogProbs ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionLogProbs(OpenAIBaseModel):
    content: list[ChatCompletionLogProbsContent] | None = None

content `class-attribute` `instance-attribute` ¶

content: list[ChatCompletionLogProbsContent] | None = None

ChatCompletionLogProbsContent ¶

Bases: ChatCompletionLogProb

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionLogProbsContent(ChatCompletionLogProb):
    # Workaround: redefine fields name cache so that it's not
    # shared with the super class.
    field_names: ClassVar[set[str] | None] = None
    top_logprobs: list[ChatCompletionLogProb] = Field(default_factory=list)

field_names `class-attribute` ¶

field_names: set[str] | None = None

top_logprobs `class-attribute` `instance-attribute` ¶

top_logprobs: list[ChatCompletionLogProb] = Field(
    default_factory=list
)

ChatCompletionNamedFunction ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionNamedFunction(OpenAIBaseModel):
    name: str

name `instance-attribute` ¶

name: str

ChatCompletionNamedToolChoiceParam ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionNamedToolChoiceParam(OpenAIBaseModel):
    function: ChatCompletionNamedFunction
    type: Literal["function"] = "function"

function `instance-attribute` ¶

function: ChatCompletionNamedFunction

type `class-attribute` `instance-attribute` ¶

type: Literal['function'] = 'function'

ChatCompletionRequest ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionRequest(OpenAIBaseModel):
    # Ordered by official OpenAI API documentation
    # https://platform.openai.com/docs/api-reference/chat/create
    messages: list[ChatCompletionMessageParam]
    model: str | None = None
    frequency_penalty: float | None = 0.0
    logit_bias: dict[str, float] | None = None
    logprobs: bool | None = False
    top_logprobs: int | None = 0
    max_tokens: int | None = Field(
        default=None,
        deprecated="max_tokens is deprecated in favor of "
        "the max_completion_tokens field",
    )
    max_completion_tokens: int | None = None
    n: int | None = 1
    presence_penalty: float | None = 0.0
    response_format: AnyResponseFormat | None = None
    seed: int | None = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
    stop: str | list[str] | None = []
    stream: bool | None = False
    stream_options: StreamOptions | None = None
    temperature: float | None = None
    top_p: float | None = None
    tools: list[ChatCompletionToolsParam] | None = None
    tool_choice: (
        Literal["none"]
        | Literal["auto"]
        | Literal["required"]
        | ChatCompletionNamedToolChoiceParam
        | None
    ) = "none"
    reasoning_effort: Literal["low", "medium", "high"] | None = None
    include_reasoning: bool = True
    parallel_tool_calls: bool | None = True

    # NOTE this will be ignored by vLLM
    user: str | None = None

    # --8<-- [start:chat-completion-sampling-params]
    use_beam_search: bool = False
    top_k: int | None = None
    min_p: float | None = None
    repetition_penalty: float | None = None
    length_penalty: float = 1.0
    stop_token_ids: list[int] | None = []
    include_stop_str_in_output: bool = False
    ignore_eos: bool = False
    min_tokens: int = 0
    skip_special_tokens: bool = True
    spaces_between_special_tokens: bool = True
    truncate_prompt_tokens: Annotated[int, Field(ge=-1, le=_LONG_INFO.max)] | None = (
        None
    )
    prompt_logprobs: int | None = None
    allowed_token_ids: list[int] | None = None
    bad_words: list[str] = Field(default_factory=list)
    # --8<-- [end:chat-completion-sampling-params]

    # --8<-- [start:chat-completion-extra-params]
    echo: bool = Field(
        default=False,
        description=(
            "If true, the new message will be prepended with the last message "
            "if they belong to the same role."
        ),
    )
    add_generation_prompt: bool = Field(
        default=True,
        description=(
            "If true, the generation prompt will be added to the chat template. "
            "This is a parameter used by chat template in tokenizer config of the "
            "model."
        ),
    )
    continue_final_message: bool = Field(
        default=False,
        description=(
            "If this is set, the chat will be formatted so that the final "
            "message in the chat is open-ended, without any EOS tokens. The "
            "model will continue this message rather than starting a new one. "
            'This allows you to "prefill" part of the model\'s response for it. '
            "Cannot be used at the same time as `add_generation_prompt`."
        ),
    )
    add_special_tokens: bool = Field(
        default=False,
        description=(
            "If true, special tokens (e.g. BOS) will be added to the prompt "
            "on top of what is added by the chat template. "
            "For most models, the chat template takes care of adding the "
            "special tokens so this should be set to false (as is the "
            "default)."
        ),
    )
    documents: list[dict[str, str]] | None = Field(
        default=None,
        description=(
            "A list of dicts representing documents that will be accessible to "
            "the model if it is performing RAG (retrieval-augmented generation)."
            " If the template does not support RAG, this argument will have no "
            "effect. We recommend that each document should be a dict containing "
            '"title" and "text" keys.'
        ),
    )
    chat_template: str | None = Field(
        default=None,
        description=(
            "A Jinja template to use for this conversion. "
            "As of transformers v4.44, default chat template is no longer "
            "allowed, so you must provide a chat template if the tokenizer "
            "does not define one."
        ),
    )
    chat_template_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Additional keyword args to pass to the template renderer. "
            "Will be accessible by the chat template."
        ),
    )
    mm_processor_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=("Additional kwargs to pass to the HF processor."),
    )
    structured_outputs: StructuredOutputsParams | None = Field(
        default=None,
        description="Additional kwargs for structured outputs",
    )
    priority: int = Field(
        default=0,
        description=(
            "The priority of the request (lower means earlier handling; "
            "default: 0). Any priority other than 0 will raise an error "
            "if the served model does not use priority scheduling."
        ),
    )
    request_id: str = Field(
        default_factory=random_uuid,
        description=(
            "The request_id related to this request. If the caller does "
            "not set it, a random_uuid will be generated. This id is used "
            "through out the inference process and return in response."
        ),
    )
    logits_processors: LogitsProcessors | None = Field(
        default=None,
        description=(
            "A list of either qualified names of logits processors, or "
            "constructor objects, to apply when sampling. A constructor is "
            "a JSON object with a required 'qualname' field specifying the "
            "qualified name of the processor class/factory, and optional "
            "'args' and 'kwargs' fields containing positional and keyword "
            "arguments. For example: {'qualname': "
            "'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': "
            "{'param': 'value'}}."
        ),
    )
    return_tokens_as_token_ids: bool | None = Field(
        default=None,
        description=(
            "If specified with 'logprobs', tokens are represented "
            " as strings of the form 'token_id:{token_id}' so that tokens "
            "that are not JSON-encodable can be identified."
        ),
    )
    return_token_ids: bool | None = Field(
        default=None,
        description=(
            "If specified, the result will include token IDs alongside the "
            "generated text. In streaming mode, prompt_token_ids is included "
            "only in the first chunk, and token_ids contains the delta tokens "
            "for each chunk. This is useful for debugging or when you "
            "need to map generated text back to input tokens."
        ),
    )
    cache_salt: str | None = Field(
        default=None,
        description=(
            "If specified, the prefix cache will be salted with the provided "
            "string to prevent an attacker to guess prompts in multi-user "
            "environments. The salt should be random, protected from "
            "access by 3rd parties, and long enough to be "
            "unpredictable (e.g., 43 characters base64-encoded, corresponding "
            "to 256 bit)."
        ),
    )
    kv_transfer_params: dict[str, Any] | None = Field(
        default=None,
        description="KVTransfer parameters used for disaggregated serving.",
    )

    vllm_xargs: dict[str, str | int | float | list[str | int | float]] | None = Field(
        default=None,
        description=(
            "Additional request parameters with (list of) string or "
            "numeric values, used by custom extensions."
        ),
    )

    # --8<-- [end:chat-completion-extra-params]

    # Default sampling parameters for chat completion requests
    _DEFAULT_SAMPLING_PARAMS: dict = {
        "repetition_penalty": 1.0,
        "temperature": 1.0,
        "top_p": 1.0,
        "top_k": 0,
        "min_p": 0.0,
    }

    def to_beam_search_params(
        self, max_tokens: int, default_sampling_params: dict
    ) -> BeamSearchParams:
        n = self.n if self.n is not None else 1
        if (temperature := self.temperature) is None:
            temperature = default_sampling_params.get(
                "temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
            )

        return BeamSearchParams(
            beam_width=n,
            max_tokens=max_tokens,
            ignore_eos=self.ignore_eos,
            temperature=temperature,
            length_penalty=self.length_penalty,
            include_stop_str_in_output=self.include_stop_str_in_output,
        )

    def to_sampling_params(
        self,
        max_tokens: int,
        logits_processor_pattern: str | None,
        default_sampling_params: dict,
    ) -> SamplingParams:
        # Default parameters
        if (repetition_penalty := self.repetition_penalty) is None:
            repetition_penalty = default_sampling_params.get(
                "repetition_penalty",
                self._DEFAULT_SAMPLING_PARAMS["repetition_penalty"],
            )
        if (temperature := self.temperature) is None:
            temperature = default_sampling_params.get(
                "temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
            )
        if (top_p := self.top_p) is None:
            top_p = default_sampling_params.get(
                "top_p", self._DEFAULT_SAMPLING_PARAMS["top_p"]
            )
        if (top_k := self.top_k) is None:
            top_k = default_sampling_params.get(
                "top_k", self._DEFAULT_SAMPLING_PARAMS["top_k"]
            )
        if (min_p := self.min_p) is None:
            min_p = default_sampling_params.get(
                "min_p", self._DEFAULT_SAMPLING_PARAMS["min_p"]
            )

        prompt_logprobs = self.prompt_logprobs
        if prompt_logprobs is None and self.echo:
            prompt_logprobs = self.top_logprobs

        response_format = self.response_format
        if response_format is not None:
            # If structured outputs wasn't already enabled,
            # we must enable it for these features to work
            if self.structured_outputs is None:
                self.structured_outputs = StructuredOutputsParams()

            # Set structured output params for response format
            if response_format.type == "json_object":
                self.structured_outputs.json_object = True
            elif response_format.type == "json_schema":
                json_schema = response_format.json_schema
                assert json_schema is not None
                self.structured_outputs.json = json_schema.json_schema
            elif response_format.type == "structural_tag":
                structural_tag = response_format
                assert structural_tag is not None and isinstance(
                    structural_tag,
                    (
                        LegacyStructuralTagResponseFormat,
                        StructuralTagResponseFormat,
                    ),
                )
                s_tag_obj = structural_tag.model_dump(by_alias=True)
                self.structured_outputs.structural_tag = json.dumps(s_tag_obj)

        extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
        if self.kv_transfer_params:
            # Pass in kv_transfer_params via extra_args
            extra_args["kv_transfer_params"] = self.kv_transfer_params
        return SamplingParams.from_optional(
            n=self.n,
            presence_penalty=self.presence_penalty,
            frequency_penalty=self.frequency_penalty,
            repetition_penalty=repetition_penalty,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            min_p=min_p,
            seed=self.seed,
            stop=self.stop,
            stop_token_ids=self.stop_token_ids,
            logprobs=self.top_logprobs if self.logprobs else None,
            prompt_logprobs=prompt_logprobs,
            ignore_eos=self.ignore_eos,
            max_tokens=max_tokens,
            min_tokens=self.min_tokens,
            skip_special_tokens=self.skip_special_tokens,
            spaces_between_special_tokens=self.spaces_between_special_tokens,
            logits_processors=get_logits_processors(
                self.logits_processors, logits_processor_pattern
            ),
            include_stop_str_in_output=self.include_stop_str_in_output,
            truncate_prompt_tokens=self.truncate_prompt_tokens,
            output_kind=RequestOutputKind.DELTA
            if self.stream
            else RequestOutputKind.FINAL_ONLY,
            structured_outputs=self.structured_outputs,
            logit_bias=self.logit_bias,
            bad_words=self.bad_words,
            allowed_token_ids=self.allowed_token_ids,
            extra_args=extra_args or None,
            skip_clone=True,  # Created fresh per request, safe to skip clone
        )

    @model_validator(mode="before")
    @classmethod
    def validate_stream_options(cls, data):
        if data.get("stream_options") and not data.get("stream"):
            raise VLLMValidationError(
                "Stream options can only be defined when `stream=True`.",
                parameter="stream_options",
            )

        return data

    @model_validator(mode="before")
    @classmethod
    def check_logprobs(cls, data):
        if (prompt_logprobs := data.get("prompt_logprobs")) is not None:
            if data.get("stream") and (prompt_logprobs > 0 or prompt_logprobs == -1):
                raise VLLMValidationError(
                    "`prompt_logprobs` are not available when `stream=True`.",
                    parameter="prompt_logprobs",
                )

            if prompt_logprobs < 0 and prompt_logprobs != -1:
                raise VLLMValidationError(
                    "`prompt_logprobs` must be a positive value or -1.",
                    parameter="prompt_logprobs",
                    value=prompt_logprobs,
                )
        if (top_logprobs := data.get("top_logprobs")) is not None:
            if top_logprobs < 0 and top_logprobs != -1:
                raise VLLMValidationError(
                    "`top_logprobs` must be a positive value or -1.",
                    parameter="top_logprobs",
                    value=top_logprobs,
                )

            if (top_logprobs == -1 or top_logprobs > 0) and not data.get("logprobs"):
                raise VLLMValidationError(
                    "when using `top_logprobs`, `logprobs` must be set to true.",
                    parameter="top_logprobs",
                )

        return data

    @model_validator(mode="before")
    @classmethod
    def check_structured_outputs_count(cls, data):
        if isinstance(data, ValueError):
            raise data

        if data.get("structured_outputs", None) is None:
            return data

        structured_outputs_kwargs = data["structured_outputs"]
        count = sum(
            structured_outputs_kwargs.get(k) is not None
            for k in ("json", "regex", "choice")
        )
        # you can only use one kind of constraints for structured outputs
        if count > 1:
            raise ValueError(
                "You can only use one kind of constraints for structured "
                "outputs ('json', 'regex' or 'choice')."
            )
        # you can only either use structured outputs or tools, not both
        if count > 1 and data.get("tool_choice", "none") not in (
            "none",
            "auto",
            "required",
        ):
            raise ValueError(
                "You can only either use constraints for structured outputs "
                "or tools, not both."
            )
        return data

    @model_validator(mode="before")
    @classmethod
    def check_tool_usage(cls, data):
        # if "tool_choice" is not specified but tools are provided,
        # default to "auto" tool_choice
        if "tool_choice" not in data and data.get("tools"):
            data["tool_choice"] = "auto"

        # if "tool_choice" is "none" -- no validation is needed for tools
        if "tool_choice" in data and data["tool_choice"] == "none":
            return data

        # if "tool_choice" is specified -- validation
        if "tool_choice" in data and data["tool_choice"] is not None:
            # ensure that if "tool choice" is specified, tools are present
            if "tools" not in data or data["tools"] is None:
                raise ValueError("When using `tool_choice`, `tools` must be set.")

            # make sure that tool choice is either a named tool
            # OR that it's set to "auto" or "required"
            if data["tool_choice"] not in ["auto", "required"] and not isinstance(
                data["tool_choice"], dict
            ):
                raise ValueError(
                    f"Invalid value for `tool_choice`: {data['tool_choice']}! "
                    'Only named tools, "none", "auto" or "required" '
                    "are supported."
                )

            # if tool_choice is "required" but the "tools" list is empty,
            # override the data to behave like "none" to align with
            # OpenAI’s behavior.
            if (
                data["tool_choice"] == "required"
                and isinstance(data["tools"], list)
                and len(data["tools"]) == 0
            ):
                data["tool_choice"] = "none"
                del data["tools"]
                return data

            # ensure that if "tool_choice" is specified as an object,
            # it matches a valid tool
            correct_usage_message = (
                'Correct usage: `{"type": "function",'
                ' "function": {"name": "my_function"}}`'
            )
            if isinstance(data["tool_choice"], dict):
                valid_tool = False
                function = data["tool_choice"].get("function")
                if not isinstance(function, dict):
                    raise ValueError(
                        f"Invalid value for `function`: `{function}` in "
                        f"`tool_choice`! {correct_usage_message}"
                    )
                if "name" not in function:
                    raise ValueError(
                        f"Expected field `name` in `function` in "
                        f"`tool_choice`! {correct_usage_message}"
                    )
                function_name = function["name"]
                if not isinstance(function_name, str) or len(function_name) == 0:
                    raise ValueError(
                        f"Invalid `name` in `function`: `{function_name}`"
                        f" in `tool_choice`! {correct_usage_message}"
                    )
                for tool in data["tools"]:
                    if tool["function"]["name"] == function_name:
                        valid_tool = True
                        break
                if not valid_tool:
                    raise ValueError(
                        "The tool specified in `tool_choice` does not match any"
                        " of the specified `tools`"
                    )
        return data

    @model_validator(mode="before")
    @classmethod
    def check_generation_prompt(cls, data):
        if data.get("continue_final_message") and data.get("add_generation_prompt"):
            raise ValueError(
                "Cannot set both `continue_final_message` and "
                "`add_generation_prompt` to True."
            )
        return data

    @model_validator(mode="before")
    @classmethod
    def check_cache_salt_support(cls, data):
        if data.get("cache_salt") is not None and (
            not isinstance(data["cache_salt"], str) or not data["cache_salt"]
        ):
            raise ValueError(
                "Parameter 'cache_salt' must be a non-empty string if provided."
            )
        return data

_DEFAULT_SAMPLING_PARAMS `class-attribute` `instance-attribute` ¶

_DEFAULT_SAMPLING_PARAMS: dict = {
    "repetition_penalty": 1.0,
    "temperature": 1.0,
    "top_p": 1.0,
    "top_k": 0,
    "min_p": 0.0,
}

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_generation_prompt: bool = Field(
    default=True,
    description="If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.",
)

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=False,
    description="If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).",
)

allowed_token_ids `class-attribute` `instance-attribute` ¶

allowed_token_ids: list[int] | None = None

bad_words `class-attribute` `instance-attribute` ¶

bad_words: list[str] = Field(default_factory=list)

cache_salt `class-attribute` `instance-attribute` ¶

cache_salt: str | None = Field(
    default=None,
    description="If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit).",
)

chat_template `class-attribute` `instance-attribute` ¶

chat_template: str | None = Field(
    default=None,
    description="A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.",
)

chat_template_kwargs `class-attribute` `instance-attribute` ¶

chat_template_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional keyword args to pass to the template renderer. Will be accessible by the chat template.",
)

continue_final_message `class-attribute` `instance-attribute` ¶

continue_final_message: bool = Field(
    default=False,
    description='If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to "prefill" part of the model\'s response for it. Cannot be used at the same time as `add_generation_prompt`.',
)

documents `class-attribute` `instance-attribute` ¶

documents: list[dict[str, str]] | None = Field(
    default=None,
    description='A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing "title" and "text" keys.',
)

echo `class-attribute` `instance-attribute` ¶

echo: bool = Field(
    default=False,
    description="If true, the new message will be prepended with the last message if they belong to the same role.",
)

frequency_penalty `class-attribute` `instance-attribute` ¶

frequency_penalty: float | None = 0.0

ignore_eos `class-attribute` `instance-attribute` ¶

ignore_eos: bool = False

include_reasoning `class-attribute` `instance-attribute` ¶

include_reasoning: bool = True

include_stop_str_in_output `class-attribute` `instance-attribute` ¶

include_stop_str_in_output: bool = False

kv_transfer_params `class-attribute` `instance-attribute` ¶

kv_transfer_params: dict[str, Any] | None = Field(
    default=None,
    description="KVTransfer parameters used for disaggregated serving.",
)

length_penalty `class-attribute` `instance-attribute` ¶

length_penalty: float = 1.0

logit_bias `class-attribute` `instance-attribute` ¶

logit_bias: dict[str, float] | None = None

logits_processors `class-attribute` `instance-attribute` ¶

logits_processors: LogitsProcessors | None = Field(
    default=None,
    description="A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.",
)

logprobs `class-attribute` `instance-attribute` ¶

logprobs: bool | None = False

max_completion_tokens `class-attribute` `instance-attribute` ¶

max_completion_tokens: int | None = None

max_tokens `class-attribute` `instance-attribute` ¶

max_tokens: int | None = Field(
    default=None,
    deprecated="max_tokens is deprecated in favor of the max_completion_tokens field",
)

messages `instance-attribute` ¶

messages: list[ChatCompletionMessageParam]

min_p `class-attribute` `instance-attribute` ¶

min_p: float | None = None

min_tokens `class-attribute` `instance-attribute` ¶

min_tokens: int = 0

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

mm_processor_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional kwargs to pass to the HF processor.",
)

model `class-attribute` `instance-attribute` ¶

model: str | None = None

n `class-attribute` `instance-attribute` ¶

n: int | None = 1

parallel_tool_calls `class-attribute` `instance-attribute` ¶

parallel_tool_calls: bool | None = True

presence_penalty `class-attribute` `instance-attribute` ¶

presence_penalty: float | None = 0.0

priority `class-attribute` `instance-attribute` ¶

priority: int = Field(
    default=0,
    description="The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
)

prompt_logprobs `class-attribute` `instance-attribute` ¶

prompt_logprobs: int | None = None

reasoning_effort `class-attribute` `instance-attribute` ¶

reasoning_effort: (
    Literal["low", "medium", "high"] | None
) = None

repetition_penalty `class-attribute` `instance-attribute` ¶

repetition_penalty: float | None = None

request_id `class-attribute` `instance-attribute` ¶

request_id: str = Field(
    default_factory=random_uuid,
    description="The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.",
)

response_format `class-attribute` `instance-attribute` ¶

response_format: AnyResponseFormat | None = None

return_token_ids `class-attribute` `instance-attribute` ¶

return_token_ids: bool | None = Field(
    default=None,
    description="If specified, the result will include token IDs alongside the generated text. In streaming mode, prompt_token_ids is included only in the first chunk, and token_ids contains the delta tokens for each chunk. This is useful for debugging or when you need to map generated text back to input tokens.",
)

return_tokens_as_token_ids `class-attribute` `instance-attribute` ¶

return_tokens_as_token_ids: bool | None = Field(
    default=None,
    description="If specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified.",
)

seed `class-attribute` `instance-attribute` ¶

seed: int | None = Field(None, ge=min, le=max)

skip_special_tokens `class-attribute` `instance-attribute` ¶

skip_special_tokens: bool = True

spaces_between_special_tokens `class-attribute` `instance-attribute` ¶

spaces_between_special_tokens: bool = True

stop `class-attribute` `instance-attribute` ¶

stop: str | list[str] | None = []

stop_token_ids `class-attribute` `instance-attribute` ¶

stop_token_ids: list[int] | None = []

stream `class-attribute` `instance-attribute` ¶

stream: bool | None = False

stream_options `class-attribute` `instance-attribute` ¶

stream_options: StreamOptions | None = None

structured_outputs `class-attribute` `instance-attribute` ¶

structured_outputs: StructuredOutputsParams | None = Field(
    default=None,
    description="Additional kwargs for structured outputs",
)

temperature `class-attribute` `instance-attribute` ¶

temperature: float | None = None

tool_choice `class-attribute` `instance-attribute` ¶

tool_choice: (
    Literal["none"]
    | Literal["auto"]
    | Literal["required"]
    | ChatCompletionNamedToolChoiceParam
    | None
) = "none"

tools `class-attribute` `instance-attribute` ¶

tools: list[ChatCompletionToolsParam] | None = None

top_k `class-attribute` `instance-attribute` ¶

top_k: int | None = None

top_logprobs `class-attribute` `instance-attribute` ¶

top_logprobs: int | None = 0

top_p `class-attribute` `instance-attribute` ¶

top_p: float | None = None

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens: (
    Annotated[int, Field(ge=-1, le=max)] | None
) = None

use_beam_search `class-attribute` `instance-attribute` ¶

use_beam_search: bool = False

user `class-attribute` `instance-attribute` ¶

user: str | None = None

vllm_xargs `class-attribute` `instance-attribute` ¶

vllm_xargs: (
    dict[str, str | int | float | list[str | int | float]]
    | None
) = Field(
    default=None,
    description="Additional request parameters with (list of) string or numeric values, used by custom extensions.",
)

check_cache_salt_support `classmethod` ¶

check_cache_salt_support(data)

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="before")
@classmethod
def check_cache_salt_support(cls, data):
    if data.get("cache_salt") is not None and (
        not isinstance(data["cache_salt"], str) or not data["cache_salt"]
    ):
        raise ValueError(
            "Parameter 'cache_salt' must be a non-empty string if provided."
        )
    return data

check_generation_prompt `classmethod` ¶

check_generation_prompt(data)

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="before")
@classmethod
def check_generation_prompt(cls, data):
    if data.get("continue_final_message") and data.get("add_generation_prompt"):
        raise ValueError(
            "Cannot set both `continue_final_message` and "
            "`add_generation_prompt` to True."
        )
    return data

check_logprobs `classmethod` ¶

check_logprobs(data)

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="before")
@classmethod
def check_logprobs(cls, data):
    if (prompt_logprobs := data.get("prompt_logprobs")) is not None:
        if data.get("stream") and (prompt_logprobs > 0 or prompt_logprobs == -1):
            raise VLLMValidationError(
                "`prompt_logprobs` are not available when `stream=True`.",
                parameter="prompt_logprobs",
            )

        if prompt_logprobs < 0 and prompt_logprobs != -1:
            raise VLLMValidationError(
                "`prompt_logprobs` must be a positive value or -1.",
                parameter="prompt_logprobs",
                value=prompt_logprobs,
            )
    if (top_logprobs := data.get("top_logprobs")) is not None:
        if top_logprobs < 0 and top_logprobs != -1:
            raise VLLMValidationError(
                "`top_logprobs` must be a positive value or -1.",
                parameter="top_logprobs",
                value=top_logprobs,
            )

        if (top_logprobs == -1 or top_logprobs > 0) and not data.get("logprobs"):
            raise VLLMValidationError(
                "when using `top_logprobs`, `logprobs` must be set to true.",
                parameter="top_logprobs",
            )

    return data

check_structured_outputs_count `classmethod` ¶

check_structured_outputs_count(data)

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="before")
@classmethod
def check_structured_outputs_count(cls, data):
    if isinstance(data, ValueError):
        raise data

    if data.get("structured_outputs", None) is None:
        return data

    structured_outputs_kwargs = data["structured_outputs"]
    count = sum(
        structured_outputs_kwargs.get(k) is not None
        for k in ("json", "regex", "choice")
    )
    # you can only use one kind of constraints for structured outputs
    if count > 1:
        raise ValueError(
            "You can only use one kind of constraints for structured "
            "outputs ('json', 'regex' or 'choice')."
        )
    # you can only either use structured outputs or tools, not both
    if count > 1 and data.get("tool_choice", "none") not in (
        "none",
        "auto",
        "required",
    ):
        raise ValueError(
            "You can only either use constraints for structured outputs "
            "or tools, not both."
        )
    return data

check_tool_usage `classmethod` ¶

check_tool_usage(data)

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="before")
@classmethod
def check_tool_usage(cls, data):
    # if "tool_choice" is not specified but tools are provided,
    # default to "auto" tool_choice
    if "tool_choice" not in data and data.get("tools"):
        data["tool_choice"] = "auto"

    # if "tool_choice" is "none" -- no validation is needed for tools
    if "tool_choice" in data and data["tool_choice"] == "none":
        return data

    # if "tool_choice" is specified -- validation
    if "tool_choice" in data and data["tool_choice"] is not None:
        # ensure that if "tool choice" is specified, tools are present
        if "tools" not in data or data["tools"] is None:
            raise ValueError("When using `tool_choice`, `tools` must be set.")

        # make sure that tool choice is either a named tool
        # OR that it's set to "auto" or "required"
        if data["tool_choice"] not in ["auto", "required"] and not isinstance(
            data["tool_choice"], dict
        ):
            raise ValueError(
                f"Invalid value for `tool_choice`: {data['tool_choice']}! "
                'Only named tools, "none", "auto" or "required" '
                "are supported."
            )

        # if tool_choice is "required" but the "tools" list is empty,
        # override the data to behave like "none" to align with
        # OpenAI’s behavior.
        if (
            data["tool_choice"] == "required"
            and isinstance(data["tools"], list)
            and len(data["tools"]) == 0
        ):
            data["tool_choice"] = "none"
            del data["tools"]
            return data

        # ensure that if "tool_choice" is specified as an object,
        # it matches a valid tool
        correct_usage_message = (
            'Correct usage: `{"type": "function",'
            ' "function": {"name": "my_function"}}`'
        )
        if isinstance(data["tool_choice"], dict):
            valid_tool = False
            function = data["tool_choice"].get("function")
            if not isinstance(function, dict):
                raise ValueError(
                    f"Invalid value for `function`: `{function}` in "
                    f"`tool_choice`! {correct_usage_message}"
                )
            if "name" not in function:
                raise ValueError(
                    f"Expected field `name` in `function` in "
                    f"`tool_choice`! {correct_usage_message}"
                )
            function_name = function["name"]
            if not isinstance(function_name, str) or len(function_name) == 0:
                raise ValueError(
                    f"Invalid `name` in `function`: `{function_name}`"
                    f" in `tool_choice`! {correct_usage_message}"
                )
            for tool in data["tools"]:
                if tool["function"]["name"] == function_name:
                    valid_tool = True
                    break
            if not valid_tool:
                raise ValueError(
                    "The tool specified in `tool_choice` does not match any"
                    " of the specified `tools`"
                )
    return data

to_beam_search_params ¶

to_beam_search_params(
    max_tokens: int, default_sampling_params: dict
) -> BeamSearchParams

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

def to_beam_search_params(
    self, max_tokens: int, default_sampling_params: dict
) -> BeamSearchParams:
    n = self.n if self.n is not None else 1
    if (temperature := self.temperature) is None:
        temperature = default_sampling_params.get(
            "temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
        )

    return BeamSearchParams(
        beam_width=n,
        max_tokens=max_tokens,
        ignore_eos=self.ignore_eos,
        temperature=temperature,
        length_penalty=self.length_penalty,
        include_stop_str_in_output=self.include_stop_str_in_output,
    )

to_sampling_params ¶

to_sampling_params(
    max_tokens: int,
    logits_processor_pattern: str | None,
    default_sampling_params: dict,
) -> SamplingParams

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

def to_sampling_params(
    self,
    max_tokens: int,
    logits_processor_pattern: str | None,
    default_sampling_params: dict,
) -> SamplingParams:
    # Default parameters
    if (repetition_penalty := self.repetition_penalty) is None:
        repetition_penalty = default_sampling_params.get(
            "repetition_penalty",
            self._DEFAULT_SAMPLING_PARAMS["repetition_penalty"],
        )
    if (temperature := self.temperature) is None:
        temperature = default_sampling_params.get(
            "temperature", self._DEFAULT_SAMPLING_PARAMS["temperature"]
        )
    if (top_p := self.top_p) is None:
        top_p = default_sampling_params.get(
            "top_p", self._DEFAULT_SAMPLING_PARAMS["top_p"]
        )
    if (top_k := self.top_k) is None:
        top_k = default_sampling_params.get(
            "top_k", self._DEFAULT_SAMPLING_PARAMS["top_k"]
        )
    if (min_p := self.min_p) is None:
        min_p = default_sampling_params.get(
            "min_p", self._DEFAULT_SAMPLING_PARAMS["min_p"]
        )

    prompt_logprobs = self.prompt_logprobs
    if prompt_logprobs is None and self.echo:
        prompt_logprobs = self.top_logprobs

    response_format = self.response_format
    if response_format is not None:
        # If structured outputs wasn't already enabled,
        # we must enable it for these features to work
        if self.structured_outputs is None:
            self.structured_outputs = StructuredOutputsParams()

        # Set structured output params for response format
        if response_format.type == "json_object":
            self.structured_outputs.json_object = True
        elif response_format.type == "json_schema":
            json_schema = response_format.json_schema
            assert json_schema is not None
            self.structured_outputs.json = json_schema.json_schema
        elif response_format.type == "structural_tag":
            structural_tag = response_format
            assert structural_tag is not None and isinstance(
                structural_tag,
                (
                    LegacyStructuralTagResponseFormat,
                    StructuralTagResponseFormat,
                ),
            )
            s_tag_obj = structural_tag.model_dump(by_alias=True)
            self.structured_outputs.structural_tag = json.dumps(s_tag_obj)

    extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {}
    if self.kv_transfer_params:
        # Pass in kv_transfer_params via extra_args
        extra_args["kv_transfer_params"] = self.kv_transfer_params
    return SamplingParams.from_optional(
        n=self.n,
        presence_penalty=self.presence_penalty,
        frequency_penalty=self.frequency_penalty,
        repetition_penalty=repetition_penalty,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        min_p=min_p,
        seed=self.seed,
        stop=self.stop,
        stop_token_ids=self.stop_token_ids,
        logprobs=self.top_logprobs if self.logprobs else None,
        prompt_logprobs=prompt_logprobs,
        ignore_eos=self.ignore_eos,
        max_tokens=max_tokens,
        min_tokens=self.min_tokens,
        skip_special_tokens=self.skip_special_tokens,
        spaces_between_special_tokens=self.spaces_between_special_tokens,
        logits_processors=get_logits_processors(
            self.logits_processors, logits_processor_pattern
        ),
        include_stop_str_in_output=self.include_stop_str_in_output,
        truncate_prompt_tokens=self.truncate_prompt_tokens,
        output_kind=RequestOutputKind.DELTA
        if self.stream
        else RequestOutputKind.FINAL_ONLY,
        structured_outputs=self.structured_outputs,
        logit_bias=self.logit_bias,
        bad_words=self.bad_words,
        allowed_token_ids=self.allowed_token_ids,
        extra_args=extra_args or None,
        skip_clone=True,  # Created fresh per request, safe to skip clone
    )

validate_stream_options `classmethod` ¶

validate_stream_options(data)

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="before")
@classmethod
def validate_stream_options(cls, data):
    if data.get("stream_options") and not data.get("stream"):
        raise VLLMValidationError(
            "Stream options can only be defined when `stream=True`.",
            parameter="stream_options",
        )

    return data

ChatCompletionResponse ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionResponse(OpenAIBaseModel):
    id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
    object: Literal["chat.completion"] = "chat.completion"
    created: int = Field(default_factory=lambda: int(time.time()))
    model: str
    choices: list[ChatCompletionResponseChoice]
    service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
    system_fingerprint: str | None = None
    usage: UsageInfo

    # vLLM-specific fields that are not in OpenAI spec
    prompt_logprobs: list[dict[int, Logprob] | None] | None = None
    prompt_token_ids: list[int] | None = None
    kv_transfer_params: dict[str, Any] | None = Field(
        default=None, description="KVTransfer parameters."
    )

choices `instance-attribute` ¶

choices: list[ChatCompletionResponseChoice]

created `class-attribute` `instance-attribute` ¶

created: int = Field(default_factory=lambda: int(time()))

id `class-attribute` `instance-attribute` ¶

id: str = Field(
    default_factory=lambda: f"chatcmpl-{random_uuid()}"
)

kv_transfer_params `class-attribute` `instance-attribute` ¶

kv_transfer_params: dict[str, Any] | None = Field(
    default=None, description="KVTransfer parameters."
)

model `instance-attribute` ¶

model: str

object `class-attribute` `instance-attribute` ¶

object: Literal['chat.completion'] = 'chat.completion'

prompt_logprobs `class-attribute` `instance-attribute` ¶

prompt_logprobs: list[dict[int, Logprob] | None] | None = (
    None
)

prompt_token_ids `class-attribute` `instance-attribute` ¶

prompt_token_ids: list[int] | None = None

service_tier `class-attribute` `instance-attribute` ¶

service_tier: (
    Literal["auto", "default", "flex", "scale", "priority"]
    | None
) = None

system_fingerprint `class-attribute` `instance-attribute` ¶

system_fingerprint: str | None = None

usage `instance-attribute` ¶

usage: UsageInfo

ChatCompletionResponseChoice ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionResponseChoice(OpenAIBaseModel):
    index: int
    message: ChatMessage
    logprobs: ChatCompletionLogProbs | None = None
    # per OpenAI spec this is the default
    finish_reason: str | None = "stop"
    # not part of the OpenAI spec but included in vLLM for legacy reasons
    stop_reason: int | str | None = None
    # not part of the OpenAI spec but is useful for tracing the tokens
    # in agent scenarios
    token_ids: list[int] | None = None

finish_reason `class-attribute` `instance-attribute` ¶

finish_reason: str | None = 'stop'

index `instance-attribute` ¶

index: int

logprobs `class-attribute` `instance-attribute` ¶

logprobs: ChatCompletionLogProbs | None = None

message `instance-attribute` ¶

message: ChatMessage

stop_reason `class-attribute` `instance-attribute` ¶

stop_reason: int | str | None = None

token_ids `class-attribute` `instance-attribute` ¶

token_ids: list[int] | None = None

ChatCompletionResponseStreamChoice ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionResponseStreamChoice(OpenAIBaseModel):
    index: int
    delta: DeltaMessage
    logprobs: ChatCompletionLogProbs | None = None
    finish_reason: str | None = None
    stop_reason: int | str | None = None
    # not part of the OpenAI spec but for tracing the tokens
    token_ids: list[int] | None = None

delta `instance-attribute` ¶

delta: DeltaMessage

finish_reason `class-attribute` `instance-attribute` ¶

finish_reason: str | None = None

index `instance-attribute` ¶

index: int

logprobs `class-attribute` `instance-attribute` ¶

logprobs: ChatCompletionLogProbs | None = None

stop_reason `class-attribute` `instance-attribute` ¶

stop_reason: int | str | None = None

token_ids `class-attribute` `instance-attribute` ¶

token_ids: list[int] | None = None

ChatCompletionStreamResponse ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionStreamResponse(OpenAIBaseModel):
    id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
    created: int = Field(default_factory=lambda: int(time.time()))
    model: str
    choices: list[ChatCompletionResponseStreamChoice]
    usage: UsageInfo | None = Field(default=None)
    # not part of the OpenAI spec but for tracing the tokens
    prompt_token_ids: list[int] | None = None

choices `instance-attribute` ¶

choices: list[ChatCompletionResponseStreamChoice]

created `class-attribute` `instance-attribute` ¶

created: int = Field(default_factory=lambda: int(time()))

id `class-attribute` `instance-attribute` ¶

id: str = Field(
    default_factory=lambda: f"chatcmpl-{random_uuid()}"
)

model `instance-attribute` ¶

model: str

object `class-attribute` `instance-attribute` ¶

object: Literal["chat.completion.chunk"] = (
    "chat.completion.chunk"
)

prompt_token_ids `class-attribute` `instance-attribute` ¶

prompt_token_ids: list[int] | None = None

usage `class-attribute` `instance-attribute` ¶

usage: UsageInfo | None = Field(default=None)

ChatCompletionToolsParam ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatCompletionToolsParam(OpenAIBaseModel):
    type: Literal["function"] = "function"
    function: FunctionDefinition

function `instance-attribute` ¶

function: FunctionDefinition

type `class-attribute` `instance-attribute` ¶

type: Literal['function'] = 'function'

ChatMessage ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

class ChatMessage(OpenAIBaseModel):
    role: str
    content: str | None = None
    refusal: str | None = None
    annotations: OpenAIAnnotation | None = None
    audio: OpenAIChatCompletionAudio | None = None
    function_call: FunctionCall | None = None
    tool_calls: list[ToolCall] = Field(default_factory=list)

    # vLLM-specific fields that are not in OpenAI spec
    reasoning: str | None = None
    reasoning_content: str | None = None
    """Deprecated: use `reasoning` instead."""

    @model_validator(mode="after")
    def handle_deprecated_reasoning_content(self):
        """Copy reasoning to reasoning_content for backward compatibility."""
        self.reasoning_content = self.reasoning
        return self

annotations `class-attribute` `instance-attribute` ¶

annotations: Annotation | None = None

audio `class-attribute` `instance-attribute` ¶

audio: ChatCompletionAudio | None = None

content `class-attribute` `instance-attribute` ¶

content: str | None = None

function_call `class-attribute` `instance-attribute` ¶

function_call: FunctionCall | None = None

reasoning `class-attribute` `instance-attribute` ¶

reasoning: str | None = None

reasoning_content `class-attribute` `instance-attribute` ¶

reasoning_content: str | None = None

Deprecated: use reasoning instead.

refusal `class-attribute` `instance-attribute` ¶

refusal: str | None = None

role `instance-attribute` ¶

role: str

tool_calls `class-attribute` `instance-attribute` ¶

tool_calls: list[ToolCall] = Field(default_factory=list)

handle_deprecated_reasoning_content ¶

handle_deprecated_reasoning_content()

Copy reasoning to reasoning_content for backward compatibility.

Source code in vllm/entrypoints/openai/chat_completion/protocol.py

@model_validator(mode="after")
def handle_deprecated_reasoning_content(self):
    """Copy reasoning to reasoning_content for backward compatibility."""
    self.reasoning_content = self.reasoning
    return self

vllm.entrypoints.openai.chat_completion.protocol ¶

_LONG_INFO module-attribute ¶

logger module-attribute ¶

ChatCompletionLogProb ¶

bytes class-attribute instance-attribute ¶

logprob class-attribute instance-attribute ¶

token instance-attribute ¶

ChatCompletionLogProbs ¶

content class-attribute instance-attribute ¶

ChatCompletionLogProbsContent ¶

field_names class-attribute ¶

top_logprobs class-attribute instance-attribute ¶

ChatCompletionNamedFunction ¶

name instance-attribute ¶

ChatCompletionNamedToolChoiceParam ¶

function instance-attribute ¶

type class-attribute instance-attribute ¶

ChatCompletionRequest ¶

_DEFAULT_SAMPLING_PARAMS class-attribute instance-attribute ¶

add_generation_prompt class-attribute instance-attribute ¶

add_special_tokens class-attribute instance-attribute ¶

allowed_token_ids class-attribute instance-attribute ¶

bad_words class-attribute instance-attribute ¶

cache_salt class-attribute instance-attribute ¶

chat_template class-attribute instance-attribute ¶

chat_template_kwargs class-attribute instance-attribute ¶

continue_final_message class-attribute instance-attribute ¶

documents class-attribute instance-attribute ¶

echo class-attribute instance-attribute ¶

frequency_penalty class-attribute instance-attribute ¶

ignore_eos class-attribute instance-attribute ¶

include_reasoning class-attribute instance-attribute ¶

include_stop_str_in_output class-attribute instance-attribute ¶

kv_transfer_params class-attribute instance-attribute ¶

length_penalty class-attribute instance-attribute ¶

logit_bias class-attribute instance-attribute ¶

logits_processors class-attribute instance-attribute ¶

logprobs class-attribute instance-attribute ¶

max_completion_tokens class-attribute instance-attribute ¶

max_tokens class-attribute instance-attribute ¶

messages instance-attribute ¶

min_p class-attribute instance-attribute ¶

min_tokens class-attribute instance-attribute ¶

mm_processor_kwargs class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

n class-attribute instance-attribute ¶

parallel_tool_calls class-attribute instance-attribute ¶

presence_penalty class-attribute instance-attribute ¶

priority class-attribute instance-attribute ¶

prompt_logprobs class-attribute instance-attribute ¶

reasoning_effort class-attribute instance-attribute ¶

repetition_penalty class-attribute instance-attribute ¶

request_id class-attribute instance-attribute ¶

response_format class-attribute instance-attribute ¶

return_token_ids class-attribute instance-attribute ¶

return_tokens_as_token_ids class-attribute instance-attribute ¶

seed class-attribute instance-attribute ¶

skip_special_tokens class-attribute instance-attribute ¶

spaces_between_special_tokens class-attribute instance-attribute ¶

stop class-attribute instance-attribute ¶

stop_token_ids class-attribute instance-attribute ¶

stream class-attribute instance-attribute ¶

stream_options class-attribute instance-attribute ¶

structured_outputs class-attribute instance-attribute ¶

temperature class-attribute instance-attribute ¶

tool_choice class-attribute instance-attribute ¶

tools class-attribute instance-attribute ¶

top_k class-attribute instance-attribute ¶

top_logprobs class-attribute instance-attribute ¶

top_p class-attribute instance-attribute ¶

truncate_prompt_tokens class-attribute instance-attribute ¶

use_beam_search class-attribute instance-attribute ¶

user class-attribute instance-attribute ¶

vllm_xargs class-attribute instance-attribute ¶

check_cache_salt_support classmethod ¶

check_generation_prompt classmethod ¶

check_logprobs classmethod ¶

check_structured_outputs_count classmethod ¶

check_tool_usage classmethod ¶

to_beam_search_params ¶

_LONG_INFO `module-attribute` ¶

logger `module-attribute` ¶

bytes `class-attribute` `instance-attribute` ¶

logprob `class-attribute` `instance-attribute` ¶

token `instance-attribute` ¶

content `class-attribute` `instance-attribute` ¶

field_names `class-attribute` ¶

top_logprobs `class-attribute` `instance-attribute` ¶

name `instance-attribute` ¶

function `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

_DEFAULT_SAMPLING_PARAMS `class-attribute` `instance-attribute` ¶

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

allowed_token_ids `class-attribute` `instance-attribute` ¶

bad_words `class-attribute` `instance-attribute` ¶

cache_salt `class-attribute` `instance-attribute` ¶

chat_template `class-attribute` `instance-attribute` ¶

chat_template_kwargs `class-attribute` `instance-attribute` ¶

continue_final_message `class-attribute` `instance-attribute` ¶

documents `class-attribute` `instance-attribute` ¶

echo `class-attribute` `instance-attribute` ¶

frequency_penalty `class-attribute` `instance-attribute` ¶

ignore_eos `class-attribute` `instance-attribute` ¶

include_reasoning `class-attribute` `instance-attribute` ¶

include_stop_str_in_output `class-attribute` `instance-attribute` ¶

kv_transfer_params `class-attribute` `instance-attribute` ¶

length_penalty `class-attribute` `instance-attribute` ¶

logit_bias `class-attribute` `instance-attribute` ¶

logits_processors `class-attribute` `instance-attribute` ¶

logprobs `class-attribute` `instance-attribute` ¶

max_completion_tokens `class-attribute` `instance-attribute` ¶

max_tokens `class-attribute` `instance-attribute` ¶

messages `instance-attribute` ¶

min_p `class-attribute` `instance-attribute` ¶

min_tokens `class-attribute` `instance-attribute` ¶

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

n `class-attribute` `instance-attribute` ¶

parallel_tool_calls `class-attribute` `instance-attribute` ¶

presence_penalty `class-attribute` `instance-attribute` ¶

priority `class-attribute` `instance-attribute` ¶

prompt_logprobs `class-attribute` `instance-attribute` ¶

reasoning_effort `class-attribute` `instance-attribute` ¶

repetition_penalty `class-attribute` `instance-attribute` ¶

request_id `class-attribute` `instance-attribute` ¶

response_format `class-attribute` `instance-attribute` ¶

return_token_ids `class-attribute` `instance-attribute` ¶

return_tokens_as_token_ids `class-attribute` `instance-attribute` ¶

seed `class-attribute` `instance-attribute` ¶

skip_special_tokens `class-attribute` `instance-attribute` ¶

spaces_between_special_tokens `class-attribute` `instance-attribute` ¶

stop `class-attribute` `instance-attribute` ¶

stop_token_ids `class-attribute` `instance-attribute` ¶

stream `class-attribute` `instance-attribute` ¶

stream_options `class-attribute` `instance-attribute` ¶

structured_outputs `class-attribute` `instance-attribute` ¶

temperature `class-attribute` `instance-attribute` ¶

tool_choice `class-attribute` `instance-attribute` ¶

tools `class-attribute` `instance-attribute` ¶

top_k `class-attribute` `instance-attribute` ¶

top_logprobs `class-attribute` `instance-attribute` ¶

top_p `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

use_beam_search `class-attribute` `instance-attribute` ¶

user `class-attribute` `instance-attribute` ¶

vllm_xargs `class-attribute` `instance-attribute` ¶

check_cache_salt_support `classmethod` ¶

check_generation_prompt `classmethod` ¶

check_logprobs `classmethod` ¶

check_structured_outputs_count `classmethod` ¶

check_tool_usage `classmethod` ¶

validate_stream_options `classmethod` ¶

choices `instance-attribute` ¶

created `class-attribute` `instance-attribute` ¶

id `class-attribute` `instance-attribute` ¶

kv_transfer_params `class-attribute` `instance-attribute` ¶

model `instance-attribute` ¶

object `class-attribute` `instance-attribute` ¶

prompt_logprobs `class-attribute` `instance-attribute` ¶