vllm.model_executor.models.modernbert ¶
ModernBertAttention ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
Wo instance-attribute ¶
Wo = RowParallelLinear(
hidden_size,
hidden_size,
bias=attention_bias,
prefix=f"{prefix}.Wo",
)
Wqkv instance-attribute ¶
Wqkv = QKVParallelLinear(
hidden_size,
head_dim,
num_heads,
bias=attention_bias,
prefix=f"{prefix}.Wqkv",
)
attn instance-attribute ¶
attn = EncoderOnlyAttention(
num_heads,
head_dim,
scaling,
prefix=f"{layer_id}.attn",
per_layer_sliding_window=sliding_window,
)
rotary_emb instance-attribute ¶
rotary_emb = get_rope(
head_size=head_dim,
max_position=max_position_embeddings,
rope_parameters=rope_parameters,
dtype=float16,
)
__init__ ¶
Source code in vllm/model_executor/models/modernbert.py
forward ¶
Source code in vllm/model_executor/models/modernbert.py
ModernBertEmbeddings ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
tok_embeddings instance-attribute ¶
tok_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__ ¶
Source code in vllm/model_executor/models/modernbert.py
embed_input_ids ¶
forward ¶
Source code in vllm/model_executor/models/modernbert.py
ModernBertEncoderLayer ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
layers instance-attribute ¶
layers = ModuleList(
[
(
ModernBertLayer(
config=config,
layer_id=layer_id,
prefix=f"{prefix}.layers.{layer_id}",
)
)
for layer_id in (range(num_hidden_layers))
]
)
__init__ ¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/modernbert.py
forward ¶
ModernBertForSequenceClassification ¶
Bases: Module, SupportsCrossEncoding
Source code in vllm/model_executor/models/modernbert.py
model instance-attribute ¶
model = ModernBertModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "modernbert"),
)
pooler instance-attribute ¶
pooler = for_seq_cls(
pooler_config, pooling=pooling, classifier=classifier
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/modernbert.py
embed_input_ids ¶
forward ¶
forward(
input_ids: LongTensor | None,
positions: Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: Tensor | None = None,
) -> Tensor
Source code in vllm/model_executor/models/modernbert.py
load_weights ¶
Source code in vllm/model_executor/models/modernbert.py
ModernBertForTokenClassification ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
model instance-attribute ¶
model = ModernBertModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "modernbert"),
)
__init__ ¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/modernbert.py
embed_input_ids ¶
forward ¶
forward(
input_ids: Tensor | None,
positions: Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: Tensor | None = None,
) -> Tensor
Source code in vllm/model_executor/models/modernbert.py
load_weights ¶
ModernBertLayer ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
attn instance-attribute ¶
attn = ModernBertAttention(
config=config,
layer_id=layer_id,
prefix=f"{prefix}.attn",
)
__init__ ¶
Source code in vllm/model_executor/models/modernbert.py
forward ¶
Source code in vllm/model_executor/models/modernbert.py
ModernBertMLP ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
Wo instance-attribute ¶
Wo = RowParallelLinear(
intermediate_size,
hidden_size,
bias=mlp_bias,
prefix=f"{prefix}.Wo",
)
__init__ ¶
__init__(config: ModernBertConfig, prefix: str = '')
Source code in vllm/model_executor/models/modernbert.py
ModernBertModel ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
encoder_layer instance-attribute ¶
encoder_layer = ModernBertEncoderLayer(
vllm_config, prefix=f"{prefix}.encoder_layer"
)
hf_to_vllm_mapper class-attribute instance-attribute ¶
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={"layers.": "encoder_layer.layers."}
)
__init__ ¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/modernbert.py
embed_input_ids ¶
forward ¶
forward(
input_ids: Tensor,
positions: Tensor,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: Tensor | None = None,
) -> Tensor
Source code in vllm/model_executor/models/modernbert.py
load_weights ¶
Source code in vllm/model_executor/models/modernbert.py
ModernBertPooler ¶
Bases: SequencePooler
Source code in vllm/model_executor/models/modernbert.py
dense instance-attribute ¶
dense = Linear(
hidden_size,
hidden_size,
classifier_bias,
dtype=head_dtype,
)
head instance-attribute ¶
head = EmbeddingPoolerHead(
head_dtype=head_dtype,
projector=lambda x: dense(x),
activation=LambdaPoolerActivation(
lambda x: norm(act(x))
),
)
norm instance-attribute ¶
norm = LayerNorm(
hidden_size,
eps=norm_eps,
bias=norm_bias,
dtype=head_dtype,
)
__init__ ¶
__init__(model_config: ModelConfig)
Source code in vllm/model_executor/models/modernbert.py
ModernBertPredictionHead ¶
Bases: Module
Source code in vllm/model_executor/models/modernbert.py
norm instance-attribute ¶
norm = LayerNorm(
hidden_size,
eps=getattr(config, "norm_eps", 1e-05),
bias=getattr(config, "norm_bias", True),
)