scripts/qa-improve.sh

#!/usr/bin/env bash
set -euo pipefail

# ──────────────────────────────────────────────────────────────
# qa-improve.sh — Score the codebase, then auto-fix what's found
#
# Runs two agents in sequence:
#   1. quality-scorer  → produces a QA report in docs/
#   2. qa-improver     → reads the report and fixes mechanical issues
#   3. quality-scorer  → re-scores to measure improvement
#
# Usage:
#   ./scripts/qa-improve.sh                # default: sonnet
#   ./scripts/qa-improve.sh --model opus   # use opus for deeper analysis
#   ./scripts/qa-improve.sh --score-only   # skip the improvement step
#   ./scripts/qa-improve.sh --fix-only     # skip initial scoring (use latest report)
# ──────────────────────────────────────────────────────────────

PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
MODEL="sonnet"
BUDGET="1.50"
SKIP_SCORE=false
SKIP_FIX=false

while [[ $# -gt 0 ]]; do
  case "$1" in
    --model)      MODEL="$2"; shift 2 ;;
    --budget)     BUDGET="$2"; shift 2 ;;
    --score-only) SKIP_FIX=true; shift ;;
    --fix-only)   SKIP_SCORE=true; shift ;;
    --help|-h)
      echo "Usage: $0 [--model sonnet|opus|haiku] [--budget USD] [--score-only] [--fix-only]"
      echo ""
      echo "Score the codebase, auto-fix issues, then re-score to measure improvement."
      echo ""
      echo "Options:"
      echo "  --model MODEL    Claude model (default: sonnet)"
      echo "  --budget USD     Max spend per agent run (default: 1.50)"
      echo "  --score-only     Run scoring only, skip auto-fix"
      echo "  --fix-only       Skip initial score, fix based on latest report"
      exit 0
      ;;
    *) echo "Unknown option: $1"; exit 1 ;;
  esac
done

cd "$PROJECT_ROOT"

# Verify prerequisites
for cmd in claude uv; do
  if ! command -v "$cmd" &>/dev/null; then
    echo "Error: $cmd not found." >&2
    exit 1
  fi
done

if [[ ! -f "docs/QA-INSTRUCTIONS.md" ]]; then
  echo "Error: docs/QA-INSTRUCTIONS.md not found." >&2
  exit 1
fi

uv sync --dev --quiet

SCORE_PROMPT="Run a full codebase quality assessment. \
Read docs/QA-INSTRUCTIONS.md for the methodology and rubrics. \
Read docs/QA-TEMPLATE.md for the report structure. \
Check docs/ for previous QA-*.md reports and compute deltas if any exist. \
Collect all raw metrics by running every command in Step 1. \
Score each dimension using the Step 2 rubrics. \
Compute the composite score using the Step 3 formula. \
Write the completed report to docs/QA-<datetime>.md. \
Print the composite score, grade, per-dimension scores, and top 3 actions."

FIX_PROMPT="Read the most recent QA report in docs/ (the QA-*.md file with the latest date, \
excluding QA-TEMPLATE.md and QA-INSTRUCTIONS.md). \
Extract the Recommended Actions table. \
Execute all auto-fixable and mechanical fixes: \
1) Run uv run ruff check --fix src/ for lint auto-fixes. \
2) Fix remaining lint violations (duplicate keys, unused vars, long lines). \
3) Fix mypy type-arg errors by adding proper generic parameters. \
4) Add __all__ exports to any modules missing them. \
5) Verify after each category with ruff check and pytest. \
Never change runtime behavior. Never modify test files. \
Report what was fixed and what was skipped."

# ── Phase 1: Score ──
if [[ "$SKIP_SCORE" == false ]]; then
  echo "Phase 1: Scoring codebase (model: $MODEL)..."
  echo "────────────────────────────────────────────"

  claude -p \
    --agent quality-scorer \
    --model "$MODEL" \
    --max-budget-usd "$BUDGET" \
    --allowedTools "Bash Read Write Grep Glob" \
    --output-format text \
    "$SCORE_PROMPT"

  REPORT=$(ls -t docs/QA-2*.md 2>/dev/null | head -1)
  echo ""
  echo "Report: ${REPORT:-none found}"
  echo ""
fi

# ── Phase 2: Fix ──
if [[ "$SKIP_FIX" == false ]]; then
  echo "Phase 2: Auto-fixing issues (model: $MODEL)..."
  echo "────────────────────────────────────────────"

  claude -p \
    --agent qa-improver \
    --model "$MODEL" \
    --max-budget-usd "$BUDGET" \
    --allowedTools "Bash Read Write Edit Grep Glob" \
    --output-format text \
    "$FIX_PROMPT"

  echo ""

  # ── Phase 3: Re-score ──
  echo "Phase 3: Re-scoring after fixes (model: $MODEL)..."
  echo "────────────────────────────────────────────"

  claude -p \
    --agent quality-scorer \
    --model "$MODEL" \
    --max-budget-usd "$BUDGET" \
    --allowedTools "Bash Read Write Grep Glob" \
    --output-format text \
    "$SCORE_PROMPT"

  FINAL_REPORT=$(ls -t docs/QA-2*.md 2>/dev/null | head -1)
  echo ""
  echo "────────────────────────────────────────────"
  echo "Final report: ${FINAL_REPORT:-none found}"
fi
QA-Improver 2026-04-11 06:42:24 -04:00			`#!/usr/bin/env bash`
			`set -euo pipefail`

			`# ──────────────────────────────────────────────────────────────`
			`# qa-improve.sh — Score the codebase, then auto-fix what's found`
			`#`
			`# Runs two agents in sequence:`
			`# 1. quality-scorer → produces a QA report in docs/`
			`# 2. qa-improver → reads the report and fixes mechanical issues`
			`# 3. quality-scorer → re-scores to measure improvement`
			`#`
			`# Usage:`
			`# ./scripts/qa-improve.sh # default: sonnet`
			`# ./scripts/qa-improve.sh --model opus # use opus for deeper analysis`
			`# ./scripts/qa-improve.sh --score-only # skip the improvement step`
			`# ./scripts/qa-improve.sh --fix-only # skip initial scoring (use latest report)`
			`# ──────────────────────────────────────────────────────────────`

			`PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"`
			`MODEL="sonnet"`
			`BUDGET="1.50"`
			`SKIP_SCORE=false`
			`SKIP_FIX=false`

			`while [[ $# -gt 0 ]]; do`
			`case "$1" in`
			`--model) MODEL="$2"; shift 2 ;;`
			`--budget) BUDGET="$2"; shift 2 ;;`
			`--score-only) SKIP_FIX=true; shift ;;`
			`--fix-only) SKIP_SCORE=true; shift ;;`
			`--help\|-h)`
			`echo "Usage: $0 [--model sonnet\|opus\|haiku] [--budget USD] [--score-only] [--fix-only]"`
			`echo ""`
			`echo "Score the codebase, auto-fix issues, then re-score to measure improvement."`
			`echo ""`
			`echo "Options:"`
			`echo " --model MODEL Claude model (default: sonnet)"`
			`echo " --budget USD Max spend per agent run (default: 1.50)"`
			`echo " --score-only Run scoring only, skip auto-fix"`
			`echo " --fix-only Skip initial score, fix based on latest report"`
			`exit 0`
			`;;`
			`*) echo "Unknown option: $1"; exit 1 ;;`
			`esac`
			`done`

			`cd "$PROJECT_ROOT"`

			`# Verify prerequisites`
			`for cmd in claude uv; do`
			`if ! command -v "$cmd" &>/dev/null; then`
			`echo "Error: $cmd not found." >&2`
			`exit 1`
			`fi`
			`done`

			`if [[ ! -f "docs/QA-INSTRUCTIONS.md" ]]; then`
			`echo "Error: docs/QA-INSTRUCTIONS.md not found." >&2`
			`exit 1`
			`fi`

			`uv sync --dev --quiet`

			`SCORE_PROMPT="Run a full codebase quality assessment. \`
			`Read docs/QA-INSTRUCTIONS.md for the methodology and rubrics. \`
			`Read docs/QA-TEMPLATE.md for the report structure. \`
			`Check docs/ for previous QA-*.md reports and compute deltas if any exist. \`
			`Collect all raw metrics by running every command in Step 1. \`
			`Score each dimension using the Step 2 rubrics. \`
			`Compute the composite score using the Step 3 formula. \`
			`Write the completed report to docs/QA-<datetime>.md. \`
			`Print the composite score, grade, per-dimension scores, and top 3 actions."`

			`FIX_PROMPT="Read the most recent QA report in docs/ (the QA-*.md file with the latest date, \`
			`excluding QA-TEMPLATE.md and QA-INSTRUCTIONS.md). \`
			`Extract the Recommended Actions table. \`
			`Execute all auto-fixable and mechanical fixes: \`
			`1) Run uv run ruff check --fix src/ for lint auto-fixes. \`
			`2) Fix remaining lint violations (duplicate keys, unused vars, long lines). \`
			`3) Fix mypy type-arg errors by adding proper generic parameters. \`
			`4) Add __all__ exports to any modules missing them. \`
			`5) Verify after each category with ruff check and pytest. \`
			`Never change runtime behavior. Never modify test files. \`
			`Report what was fixed and what was skipped."`

			`# ── Phase 1: Score ──`
			`if [[ "$SKIP_SCORE" == false ]]; then`
			`echo "Phase 1: Scoring codebase (model: $MODEL)..."`
			`echo "────────────────────────────────────────────"`

			`claude -p \`
			`--agent quality-scorer \`
			`--model "$MODEL" \`
			`--max-budget-usd "$BUDGET" \`
			`--allowedTools "Bash Read Write Grep Glob" \`
			`--output-format text \`
			`"$SCORE_PROMPT"`

			`REPORT=$(ls -t docs/QA-2*.md 2>/dev/null \| head -1)`
			`echo ""`
			`echo "Report: ${REPORT:-none found}"`
			`echo ""`
			`fi`

			`# ── Phase 2: Fix ──`
			`if [[ "$SKIP_FIX" == false ]]; then`
			`echo "Phase 2: Auto-fixing issues (model: $MODEL)..."`
			`echo "────────────────────────────────────────────"`

			`claude -p \`
			`--agent qa-improver \`
			`--model "$MODEL" \`
			`--max-budget-usd "$BUDGET" \`
			`--allowedTools "Bash Read Write Edit Grep Glob" \`
			`--output-format text \`
			`"$FIX_PROMPT"`

			`echo ""`

			`# ── Phase 3: Re-score ──`
			`echo "Phase 3: Re-scoring after fixes (model: $MODEL)..."`
			`echo "────────────────────────────────────────────"`

			`claude -p \`
			`--agent quality-scorer \`
			`--model "$MODEL" \`
			`--max-budget-usd "$BUDGET" \`
			`--allowedTools "Bash Read Write Grep Glob" \`
			`--output-format text \`
			`"$SCORE_PROMPT"`

			`FINAL_REPORT=$(ls -t docs/QA-2*.md 2>/dev/null \| head -1)`
			`echo ""`
			`echo "────────────────────────────────────────────"`
			`echo "Final report: ${FINAL_REPORT:-none found}"`
			`fi`