vllm.benchmarks.sweep.serve_sla ¶
SLAHistory ¶
Source code in vllm/benchmarks/sweep/serve_sla.py
SweepServeSLAArgs dataclass ¶
Bases: SweepServeArgs
Source code in vllm/benchmarks/sweep/serve_sla.py
parser_help class-attribute ¶
parser_help: str = (
"Tune a variable to meet SLAs under multiple settings."
)
__init__ ¶
__init__(
serve_cmd: list[str],
bench_cmd: list[str],
after_bench_cmd: list[str],
show_stdout: bool,
serve_params: ParameterSweep,
bench_params: ParameterSweep,
output_dir: Path,
num_runs: int,
dry_run: bool,
resume: str | None,
link_vars: list[tuple[str, str]] | None,
sla_params: SLASweep,
sla_variable: SLAVariable,
) -> None
add_cli_args classmethod ¶
add_cli_args(parser: ArgumentParser) -> ArgumentParser
Source code in vllm/benchmarks/sweep/serve_sla.py
from_cli_args classmethod ¶
from_cli_args(args: Namespace)
Source code in vllm/benchmarks/sweep/serve_sla.py
_compute_margin ¶
_compute_margin(
sla_comb: SLASweepItem,
iter_data: list[dict[str, object]],
)
Source code in vllm/benchmarks/sweep/serve_sla.py
_get_sla_base_path ¶
_get_sla_base_path(
output_dir: Path,
serve_comb: ParameterSweepItem,
bench_comb: ParameterSweepItem,
)
Source code in vllm/benchmarks/sweep/serve_sla.py
_get_sla_iter_path ¶
_get_sla_iter_path(
base_path: Path,
sla_comb: SLASweepItem,
sla_variable: str,
sla_value: int | None,
)
Source code in vllm/benchmarks/sweep/serve_sla.py
_get_sla_run_path ¶
_iter_sla_val_paths ¶
Source code in vllm/benchmarks/sweep/serve_sla.py
_sla_needs_server ¶
_sla_needs_server(
serve_comb: ParameterSweepItem,
bench_combs: ParameterSweep,
sla_combs: SLASweep,
sla_variable: str,
output_dir: Path,
)
Source code in vllm/benchmarks/sweep/serve_sla.py
run_main ¶
run_main(args: SweepServeSLAArgs)
Source code in vllm/benchmarks/sweep/serve_sla.py
run_sla ¶
run_sla(
server: ServerProcess | None,
bench_cmd: list[str],
*,
serve_comb: ParameterSweepItem,
bench_comb: ParameterSweepItem,
iter_path: Path,
num_runs: int,
dry_run: bool,
)
Source code in vllm/benchmarks/sweep/serve_sla.py
run_slas ¶
run_slas(
serve_cmd: list[str],
bench_cmd: list[str],
after_bench_cmd: list[str],
*,
show_stdout: bool,
serve_params: ParameterSweep,
bench_params: ParameterSweep,
sla_params: SLASweep,
sla_variable: SLAVariable,
output_dir: Path,
num_runs: int,
dry_run: bool,
)
Source code in vllm/benchmarks/sweep/serve_sla.py
search_sla ¶
search_sla(
server: ServerProcess | None,
bench_cmd: list[str],
*,
serve_comb: ParameterSweepItem,
bench_comb: ParameterSweepItem,
sla_comb: SLASweepItem,
sla_variable: SLAVariable,
base_path: Path,
num_runs: int,
dry_run: bool,
)
Source code in vllm/benchmarks/sweep/serve_sla.py
solve_sla ¶
solve_sla(
server: ServerProcess | None,
bench_cmd: list[str],
*,
serve_comb: ParameterSweepItem,
bench_comb: ParameterSweepItem,
sla_comb: SLASweepItem,
base_path: Path,
num_runs: int,
dry_run: bool,
sla_variable: SLAVariable,
sla_min_value: int = 1,
sla_max_value: int = 8192,
)