QA-Improver

2026-04-11 06:42:24 -04:00
parent 794de9c721
commit 0686224824
75 changed files with 350 additions and 108 deletions
--- a/scripts/qa-improve.sh
+++ b/scripts/qa-improve.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# ──────────────────────────────────────────────────────────────
+# qa-improve.sh — Score the codebase, then auto-fix what's found
+#
+# Runs two agents in sequence:
+#   1. quality-scorer  → produces a QA report in docs/
+#   2. qa-improver     → reads the report and fixes mechanical issues
+#   3. quality-scorer  → re-scores to measure improvement
+#
+# Usage:
+#   ./scripts/qa-improve.sh                # default: sonnet
+#   ./scripts/qa-improve.sh --model opus   # use opus for deeper analysis
+#   ./scripts/qa-improve.sh --score-only   # skip the improvement step
+#   ./scripts/qa-improve.sh --fix-only     # skip initial scoring (use latest report)
+# ──────────────────────────────────────────────────────────────
+
+PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+MODEL="sonnet"
+BUDGET="1.50"
+SKIP_SCORE=false
+SKIP_FIX=false
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --model)      MODEL="$2"; shift 2 ;;
+    --budget)     BUDGET="$2"; shift 2 ;;
+    --score-only) SKIP_FIX=true; shift ;;
+    --fix-only)   SKIP_SCORE=true; shift ;;
+    --help|-h)
+      echo "Usage: $0 [--model sonnet|opus|haiku] [--budget USD] [--score-only] [--fix-only]"
+      echo ""
+      echo "Score the codebase, auto-fix issues, then re-score to measure improvement."
+      echo ""
+      echo "Options:"
+      echo "  --model MODEL    Claude model (default: sonnet)"
+      echo "  --budget USD     Max spend per agent run (default: 1.50)"
+      echo "  --score-only     Run scoring only, skip auto-fix"
+      echo "  --fix-only       Skip initial score, fix based on latest report"
+      exit 0
+      ;;
+    *) echo "Unknown option: $1"; exit 1 ;;
+  esac
+done
+
+cd "$PROJECT_ROOT"
+
+# Verify prerequisites
+for cmd in claude uv; do
+  if ! command -v "$cmd" &>/dev/null; then
+    echo "Error: $cmd not found." >&2
+    exit 1
+  fi
+done
+
+if [[ ! -f "docs/QA-INSTRUCTIONS.md" ]]; then
+  echo "Error: docs/QA-INSTRUCTIONS.md not found." >&2
+  exit 1
+fi
+
+uv sync --dev --quiet
+
+SCORE_PROMPT="Run a full codebase quality assessment. \
+Read docs/QA-INSTRUCTIONS.md for the methodology and rubrics. \
+Read docs/QA-TEMPLATE.md for the report structure. \
+Check docs/ for previous QA-*.md reports and compute deltas if any exist. \
+Collect all raw metrics by running every command in Step 1. \
+Score each dimension using the Step 2 rubrics. \
+Compute the composite score using the Step 3 formula. \
+Write the completed report to docs/QA-<datetime>.md. \
+Print the composite score, grade, per-dimension scores, and top 3 actions."
+
+FIX_PROMPT="Read the most recent QA report in docs/ (the QA-*.md file with the latest date, \
+excluding QA-TEMPLATE.md and QA-INSTRUCTIONS.md). \
+Extract the Recommended Actions table. \
+Execute all auto-fixable and mechanical fixes: \
+1) Run uv run ruff check --fix src/ for lint auto-fixes. \
+2) Fix remaining lint violations (duplicate keys, unused vars, long lines). \
+3) Fix mypy type-arg errors by adding proper generic parameters. \
+4) Add __all__ exports to any modules missing them. \
+5) Verify after each category with ruff check and pytest. \
+Never change runtime behavior. Never modify test files. \
+Report what was fixed and what was skipped."
+
+# ── Phase 1: Score ──
+if [[ "$SKIP_SCORE" == false ]]; then
+  echo "Phase 1: Scoring codebase (model: $MODEL)..."
+  echo "────────────────────────────────────────────"
+
+  claude -p \
+    --agent quality-scorer \
+    --model "$MODEL" \
+    --max-budget-usd "$BUDGET" \
+    --allowedTools "Bash Read Write Grep Glob" \
+    --output-format text \
+    "$SCORE_PROMPT"
+
+  REPORT=$(ls -t docs/QA-2*.md 2>/dev/null | head -1)
+  echo ""
+  echo "Report: ${REPORT:-none found}"
+  echo ""
+fi
+
+# ── Phase 2: Fix ──
+if [[ "$SKIP_FIX" == false ]]; then
+  echo "Phase 2: Auto-fixing issues (model: $MODEL)..."
+  echo "────────────────────────────────────────────"
+
+  claude -p \
+    --agent qa-improver \
+    --model "$MODEL" \
+    --max-budget-usd "$BUDGET" \
+    --allowedTools "Bash Read Write Edit Grep Glob" \
+    --output-format text \
+    "$FIX_PROMPT"
+
+  echo ""
+
+  # ── Phase 3: Re-score ──
+  echo "Phase 3: Re-scoring after fixes (model: $MODEL)..."
+  echo "────────────────────────────────────────────"
+
+  claude -p \
+    --agent quality-scorer \
+    --model "$MODEL" \
+    --max-budget-usd "$BUDGET" \
+    --allowedTools "Bash Read Write Grep Glob" \
+    --output-format text \
+    "$SCORE_PROMPT"
+
+  FINAL_REPORT=$(ls -t docs/QA-2*.md 2>/dev/null | head -1)
+  echo ""
+  echo "────────────────────────────────────────────"
+  echo "Final report: ${FINAL_REPORT:-none found}"
+fi