137 lines
5.1 KiB
Bash
137 lines
5.1 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
# ──────────────────────────────────────────────────────────────
|
||
|
|
# qa-improve.sh — Score the codebase, then auto-fix what's found
|
||
|
|
#
|
||
|
|
# Runs two agents in sequence:
|
||
|
|
# 1. quality-scorer → produces a QA report in docs/
|
||
|
|
# 2. qa-improver → reads the report and fixes mechanical issues
|
||
|
|
# 3. quality-scorer → re-scores to measure improvement
|
||
|
|
#
|
||
|
|
# Usage:
|
||
|
|
# ./scripts/qa-improve.sh # default: sonnet
|
||
|
|
# ./scripts/qa-improve.sh --model opus # use opus for deeper analysis
|
||
|
|
# ./scripts/qa-improve.sh --score-only # skip the improvement step
|
||
|
|
# ./scripts/qa-improve.sh --fix-only # skip initial scoring (use latest report)
|
||
|
|
# ──────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||
|
|
MODEL="sonnet"
|
||
|
|
BUDGET="1.50"
|
||
|
|
SKIP_SCORE=false
|
||
|
|
SKIP_FIX=false
|
||
|
|
|
||
|
|
while [[ $# -gt 0 ]]; do
|
||
|
|
case "$1" in
|
||
|
|
--model) MODEL="$2"; shift 2 ;;
|
||
|
|
--budget) BUDGET="$2"; shift 2 ;;
|
||
|
|
--score-only) SKIP_FIX=true; shift ;;
|
||
|
|
--fix-only) SKIP_SCORE=true; shift ;;
|
||
|
|
--help|-h)
|
||
|
|
echo "Usage: $0 [--model sonnet|opus|haiku] [--budget USD] [--score-only] [--fix-only]"
|
||
|
|
echo ""
|
||
|
|
echo "Score the codebase, auto-fix issues, then re-score to measure improvement."
|
||
|
|
echo ""
|
||
|
|
echo "Options:"
|
||
|
|
echo " --model MODEL Claude model (default: sonnet)"
|
||
|
|
echo " --budget USD Max spend per agent run (default: 1.50)"
|
||
|
|
echo " --score-only Run scoring only, skip auto-fix"
|
||
|
|
echo " --fix-only Skip initial score, fix based on latest report"
|
||
|
|
exit 0
|
||
|
|
;;
|
||
|
|
*) echo "Unknown option: $1"; exit 1 ;;
|
||
|
|
esac
|
||
|
|
done
|
||
|
|
|
||
|
|
cd "$PROJECT_ROOT"
|
||
|
|
|
||
|
|
# Verify prerequisites
|
||
|
|
for cmd in claude uv; do
|
||
|
|
if ! command -v "$cmd" &>/dev/null; then
|
||
|
|
echo "Error: $cmd not found." >&2
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
if [[ ! -f "docs/QA-INSTRUCTIONS.md" ]]; then
|
||
|
|
echo "Error: docs/QA-INSTRUCTIONS.md not found." >&2
|
||
|
|
exit 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
uv sync --dev --quiet
|
||
|
|
|
||
|
|
SCORE_PROMPT="Run a full codebase quality assessment. \
|
||
|
|
Read docs/QA-INSTRUCTIONS.md for the methodology and rubrics. \
|
||
|
|
Read docs/QA-TEMPLATE.md for the report structure. \
|
||
|
|
Check docs/ for previous QA-*.md reports and compute deltas if any exist. \
|
||
|
|
Collect all raw metrics by running every command in Step 1. \
|
||
|
|
Score each dimension using the Step 2 rubrics. \
|
||
|
|
Compute the composite score using the Step 3 formula. \
|
||
|
|
Write the completed report to docs/QA-<datetime>.md. \
|
||
|
|
Print the composite score, grade, per-dimension scores, and top 3 actions."
|
||
|
|
|
||
|
|
FIX_PROMPT="Read the most recent QA report in docs/ (the QA-*.md file with the latest date, \
|
||
|
|
excluding QA-TEMPLATE.md and QA-INSTRUCTIONS.md). \
|
||
|
|
Extract the Recommended Actions table. \
|
||
|
|
Execute all auto-fixable and mechanical fixes: \
|
||
|
|
1) Run uv run ruff check --fix src/ for lint auto-fixes. \
|
||
|
|
2) Fix remaining lint violations (duplicate keys, unused vars, long lines). \
|
||
|
|
3) Fix mypy type-arg errors by adding proper generic parameters. \
|
||
|
|
4) Add __all__ exports to any modules missing them. \
|
||
|
|
5) Verify after each category with ruff check and pytest. \
|
||
|
|
Never change runtime behavior. Never modify test files. \
|
||
|
|
Report what was fixed and what was skipped."
|
||
|
|
|
||
|
|
# ── Phase 1: Score ──
|
||
|
|
if [[ "$SKIP_SCORE" == false ]]; then
|
||
|
|
echo "Phase 1: Scoring codebase (model: $MODEL)..."
|
||
|
|
echo "────────────────────────────────────────────"
|
||
|
|
|
||
|
|
claude -p \
|
||
|
|
--agent quality-scorer \
|
||
|
|
--model "$MODEL" \
|
||
|
|
--max-budget-usd "$BUDGET" \
|
||
|
|
--allowedTools "Bash Read Write Grep Glob" \
|
||
|
|
--output-format text \
|
||
|
|
"$SCORE_PROMPT"
|
||
|
|
|
||
|
|
REPORT=$(ls -t docs/QA-2*.md 2>/dev/null | head -1)
|
||
|
|
echo ""
|
||
|
|
echo "Report: ${REPORT:-none found}"
|
||
|
|
echo ""
|
||
|
|
fi
|
||
|
|
|
||
|
|
# ── Phase 2: Fix ──
|
||
|
|
if [[ "$SKIP_FIX" == false ]]; then
|
||
|
|
echo "Phase 2: Auto-fixing issues (model: $MODEL)..."
|
||
|
|
echo "────────────────────────────────────────────"
|
||
|
|
|
||
|
|
claude -p \
|
||
|
|
--agent qa-improver \
|
||
|
|
--model "$MODEL" \
|
||
|
|
--max-budget-usd "$BUDGET" \
|
||
|
|
--allowedTools "Bash Read Write Edit Grep Glob" \
|
||
|
|
--output-format text \
|
||
|
|
"$FIX_PROMPT"
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
# ── Phase 3: Re-score ──
|
||
|
|
echo "Phase 3: Re-scoring after fixes (model: $MODEL)..."
|
||
|
|
echo "────────────────────────────────────────────"
|
||
|
|
|
||
|
|
claude -p \
|
||
|
|
--agent quality-scorer \
|
||
|
|
--model "$MODEL" \
|
||
|
|
--max-budget-usd "$BUDGET" \
|
||
|
|
--allowedTools "Bash Read Write Grep Glob" \
|
||
|
|
--output-format text \
|
||
|
|
"$SCORE_PROMPT"
|
||
|
|
|
||
|
|
FINAL_REPORT=$(ls -t docs/QA-2*.md 2>/dev/null | head -1)
|
||
|
|
echo ""
|
||
|
|
echo "────────────────────────────────────────────"
|
||
|
|
echo "Final report: ${FINAL_REPORT:-none found}"
|
||
|
|
fi
|