Skip to content

vllm.config.model_arch

logger module-attribute

logger = init_logger(__name__)

ModelArchitectureConfig

Configuration for model architecture that required by vLLM runtime

Source code in vllm/config/model_arch.py
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
class ModelArchitectureConfig:
    """
    Configuration for model architecture that required by vLLM runtime
    """

    architectures: list[str] | None
    """List of model architecture class names (e.g., ['LlamaForCausalLM']).
       It can be None upon calling `vllm_config.with_hf_config(config.text_config)`"""

    model_type: str
    """Model type identifier (e.g., 'llama', 'gpt_oss')."""

    text_model_type: str | None
    """Text model type identifier (e.g., 'llama4_text')."""

    hidden_size: int
    """Hidden size of the model."""

    total_num_hidden_layers: int
    """Number of hidden layers in the model."""

    total_num_attention_heads: int
    """Number of attention heads in the model."""

    head_size: int
    """Head dimension of the model."""

    vocab_size: int
    """Vocabulary size of the model."""

    total_num_kv_heads: int
    """Number of key value heads in the model."""

    num_experts: int
    """Number of experts in the model."""

    quantization_config: dict[str, Any] | None
    """Quantization configuration dictionary containing quantization parameters."""

    is_deepseek_mla: bool
    """Whether the model is a DeepSeek MLA model."""

    derived_max_model_len_and_key: tuple[float, str | None]
    """Derived maximum model length and key from the hf config."""

architectures instance-attribute

architectures: list[str] | None

List of model architecture class names (e.g., ['LlamaForCausalLM']). It can be None upon calling vllm_config.with_hf_config(config.text_config)

derived_max_model_len_and_key instance-attribute

derived_max_model_len_and_key: tuple[float, str | None]

Derived maximum model length and key from the hf config.

head_size instance-attribute

head_size: int

Head dimension of the model.

hidden_size instance-attribute

hidden_size: int

Hidden size of the model.

is_deepseek_mla instance-attribute

is_deepseek_mla: bool

Whether the model is a DeepSeek MLA model.

model_type instance-attribute

model_type: str

Model type identifier (e.g., 'llama', 'gpt_oss').

num_experts instance-attribute

num_experts: int

Number of experts in the model.

quantization_config instance-attribute

quantization_config: dict[str, Any] | None

Quantization configuration dictionary containing quantization parameters.

text_model_type instance-attribute

text_model_type: str | None

Text model type identifier (e.g., 'llama4_text').

total_num_attention_heads instance-attribute

total_num_attention_heads: int

Number of attention heads in the model.

total_num_hidden_layers instance-attribute

total_num_hidden_layers: int

Number of hidden layers in the model.

total_num_kv_heads instance-attribute

total_num_kv_heads: int

Number of key value heads in the model.

vocab_size instance-attribute

vocab_size: int

Vocabulary size of the model.