Commands#
CLI commands for the Agent Track evaluation.
Basic Usage#
cd agent_bench
# Single operator
bash test_ops.sh add --device-count 1
# Multiple operators
bash test_ops.sh add,softmax,mul --device-count 4
# All operators
bash test_ops.sh --device-count 8
Method Selection#
# Naive method (single call)
bash test_ops.sh add -m naive_cc --device-count 1
# Normal method (with verification loop)
bash test_ops.sh add -m normal_cc --device-count 1
# OpenCode methods
bash test_ops.sh add -m naive_opencode --device-count 1
bash test_ops.sh add -m normal_opencode --device-count 1
Specialized Agents#
# AutoKernel
bash test_autokernel.sh add --device-count 1
# AKO4ALL
bash test_ako4all.sh add --device-count 1
# CUDA Optimized Skill
bash test_cuda_optimized_skill.sh add --device-count 1
Parameters#
Parameter |
Default |
Description |
|---|---|---|
|
All |
Comma-separated operator names |
|
|
Dataset to use |
|
|
Agent method |
|
8 |
Number of GPUs |
|
600 |
Timeout per operator (seconds) |
|
Off |
Skip prompt generation |
|
Off |
Skip verification |
|
Off |
Enable verbose output |
Output#
Results saved to agent_bench/runs/<run_name>/:
File |
Description |
|---|---|
|
Real-time progress tracking |
|
Generated kernel files |
|
Verification results |
|
Execution logs |
Monitoring Progress#
# Watch progress file
cat agent_bench/runs/<run_name>/progress.json
Analyzing Results#
python scripts/analyze/analyze.py agent_bench/runs/<run_dir>/