vllm.config.model_arch ¶

logger `module-attribute` ¶

logger = init_logger(__name__)

ModelArchitectureConfig ¶

Configuration for model architecture that required by vLLM runtime

Source code in vllm/config/model_arch.py

@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
class ModelArchitectureConfig:
    """
    Configuration for model architecture that required by vLLM runtime
    """

    architectures: list[str] | None
    """List of model architecture class names (e.g., ['LlamaForCausalLM']).
       It can be None upon calling `vllm_config.with_hf_config(config.text_config)`"""

    model_type: str
    """Model type identifier (e.g., 'llama', 'gpt_oss')."""

    text_model_type: str | None
    """Text model type identifier (e.g., 'llama4_text')."""

    hidden_size: int
    """Hidden size of the model."""

    total_num_hidden_layers: int
    """Number of hidden layers in the model."""

    total_num_attention_heads: int
    """Number of attention heads in the model."""

    head_size: int
    """Head dimension of the model."""

    vocab_size: int
    """Vocabulary size of the model."""

    total_num_kv_heads: int
    """Number of key value heads in the model."""

    num_experts: int
    """Number of experts in the model."""

    quantization_config: dict[str, Any] | None
    """Quantization configuration dictionary containing quantization parameters."""

    is_deepseek_mla: bool
    """Whether the model is a DeepSeek MLA model."""

    derived_max_model_len_and_key: tuple[float, str | None]
    """Derived maximum model length and key from the hf config."""

architectures `instance-attribute` ¶

architectures: list[str] | None

List of model architecture class names (e.g., ['LlamaForCausalLM']). It can be None upon calling vllm_config.with_hf_config(config.text_config)

derived_max_model_len_and_key `instance-attribute` ¶

derived_max_model_len_and_key: tuple[float, str | None]

Derived maximum model length and key from the hf config.

head_size `instance-attribute` ¶

head_size: int

Head dimension of the model.

hidden_size `instance-attribute` ¶

hidden_size: int

Hidden size of the model.

is_deepseek_mla `instance-attribute` ¶

is_deepseek_mla: bool

Whether the model is a DeepSeek MLA model.

model_type `instance-attribute` ¶

model_type: str

Model type identifier (e.g., 'llama', 'gpt_oss').

num_experts `instance-attribute` ¶

num_experts: int

Number of experts in the model.

quantization_config `instance-attribute` ¶

quantization_config: dict[str, Any] | None

Quantization configuration dictionary containing quantization parameters.

text_model_type `instance-attribute` ¶

text_model_type: str | None

Text model type identifier (e.g., 'llama4_text').

total_num_attention_heads `instance-attribute` ¶

total_num_attention_heads: int

Number of attention heads in the model.

total_num_hidden_layers `instance-attribute` ¶

total_num_hidden_layers: int

Number of hidden layers in the model.

total_num_kv_heads `instance-attribute` ¶

total_num_kv_heads: int

Number of key value heads in the model.

vocab_size `instance-attribute` ¶

vocab_size: int

Vocabulary size of the model.

vllm.config.model_arch ¶

logger module-attribute ¶

ModelArchitectureConfig ¶

architectures instance-attribute ¶

derived_max_model_len_and_key instance-attribute ¶

head_size instance-attribute ¶

hidden_size instance-attribute ¶

is_deepseek_mla instance-attribute ¶

model_type instance-attribute ¶

num_experts instance-attribute ¶

quantization_config instance-attribute ¶

text_model_type instance-attribute ¶

total_num_attention_heads instance-attribute ¶

total_num_hidden_layers instance-attribute ¶

total_num_kv_heads instance-attribute ¶

vocab_size instance-attribute ¶