Files
shaggy-solar/.claude/skills/lib/grid-cal-monitor
noise 4bfa021719 Fix POP01 encoding bug + harden grid-cal revert verification
Root cause of the grid-calibration auto-revert silently failing: lvx-control
and flash.py encode output_priority solar_battery_utility as "POP01", but PI18
POP is single-digit — the inverter silently rejects "POP01" (raw "POP1" works,
matches the POP_PIRI decoder). Compounded by powermon's adhoc queue wedging,
which dropped commands entirely until a restart. So the monitor logged "revert
done" while the cluster sat in SUB/grid mode for ~1.5h (no harm: battery full,
just running loads on grid).

- lvx-control + flash.py: POP_MAP "01" -> "1" (also patched the live
  /usr/local/bin/lvx-control + restarted; verified it now emits POP1).
- grid-cal-monitor: revert now VERIFIES via behavior (line_power_direction
  leaves 'input'), and on failure restarts powermon and re-sends raw POP1/PCP0,0,
  with a loud manual-fallback message. No more trust-the-publish.

Recovery for the live run: restarted powermon (unstuck adhoc) + raw POP1 + PCP0,0;
confirmed POP=Solar-Battery-Utility, PCP=Solar First, mode=Battery, line_dir=donothing.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-25 21:37:16 -04:00

108 lines
5.7 KiB
Bash
Executable File

#!/usr/bin/env bash
# grid-cal-monitor — supervise a grid-assisted calibration charge to full, then
# auto-revert the inverter back to normal (battery-priority, solar-only charging).
#
# Assumes the operator already set, via lvx-control:
# output_priority = solar_utility_battery (SUB: grid powers loads + charges batt)
# charger_priority = solar_and_utility
# This script ONLY monitors and then REVERTS those two (POP->solar_battery_utility,
# PCP->solar_first). It changes nothing else. Reverting is trap-guaranteed on exit.
#
# Done = all 6 packs report SoC>=99 (BMS re-anchored), or pack_V>=56.2 with combined
# current tapered <20A for 2 polls. Safety: abort+revert if any cell>3.60V or temp>45C.
# Max runtime guard then revert regardless.
set -uo pipefail
HA="http://10.0.0.41:8123"; TOKEN_FILE="$HOME/.config/ha/token"
POWERMON_CONF="$HOME/.config/powermon/powermon.yaml"
CELL_ABORT=3.60; TEMP_ABORT=45; SOC_DONE=99
VFULL=56.2; ITAPER=20; MAX_HOURS=8; POLL_S=300
RUNDIR="$HOME/solar-runs"; mkdir -p "$RUNDIR"
LOG="$RUNDIR/gridcal-$(date +%Y%m%d).log"
REVERTED=0
log(){ printf '%s %s\n' "$(date '+%F %T')" "$*" | tee -a "$LOG"; }
read -r BHOST BPORT BUSER BPASS < <(awk '/^[^[:space:]]/{i=0}/^mqttbroker:/{i=1;next} i&&/^[[:space:]]+name:/{h=$2} i&&/^[[:space:]]+port:/{p=$2} i&&/^[[:space:]]+username:/{u=$2} i&&/^[[:space:]]+password:/{w=$2} END{print h,(p?p:1883),u,w}' "$POWERMON_CONF")
TOKEN="$(cat "$TOKEN_FILE")"
ha(){ curl -s -H "Authorization: Bearer $TOKEN" "$HA/api/states/$1"; }
st(){ ha "$1" | python3 -c 'import sys,json
try:print(json.load(sys.stdin).get("state",""))
except:print("")'; }
tc(){ ha "$1" | python3 -c 'import sys,json
try:
d=json.load(sys.stdin);s=float(d["state"]);u=d["attributes"].get("unit_of_measurement","")
print(round((s-32)*5/9,1) if "F" in u else round(s,1))
except:print("")'; }
P(){ echo "sensor.eg4_lifepower4_lifepower4_${1}_lifepower4_${1}_${2}"; }
pub(){ mosquitto_pub -h "$BHOST" -p "$BPORT" -u "$BUSER" -P "$BPASS" -t "solar/control/lvx6048/$1" -m "$2"; }
raw(){ for u in 1 2; do mosquitto_pub -h "$BHOST" -p "$BPORT" -u "$BUSER" -P "$BPASS" -t "powermon/lvx6048_${u}/addcommand" -m "$1"; done; }
# revert VERIFIES it actually took (the friendly path can silently fail — lvx-control
# used to encode POP01 which the inverter rejects, and powermon's adhoc queue can wedge).
# Ground truth = behavior: in SBU with a full bank, line_power_direction leaves 'input'.
reverted_ok(){
local ld pop; ld=$(st sensor.lvx6048_lvx6048_1_line_power_direction)
pop=$(st sensor.lvx6048_lvx6048_1_output_source_priority)
{ [ -n "$ld" ] && [ "$ld" != "input" ]; } || echo "$pop" | grep -q "Battery - Utility"
}
revert(){
[ "$REVERTED" = 1 ] && return 0
log "REVERT: output_priority->solar_battery_utility, charger_priority->solar_first"
pub output_priority solar_battery_utility; sleep 3; pub charger_priority solar_first; sleep 12
if reverted_ok; then log "REVERT verified (line_dir=$(st sensor.lvx6048_lvx6048_1_line_power_direction))"; REVERTED=1; return 0; fi
# escalate: powermon adhoc may be wedged and/or friendly encode rejected -> restart + raw
for try in 1 2; do
log "REVERT not effective yet — restart powermon + raw POP1/PCP0,0 (try $try)"
sudo systemctl restart powermon.service powermon2.service 2>/dev/null; sleep 12
raw POP1; sleep 3; raw PCP0,0; sleep 15
if reverted_ok; then log "REVERT verified after escalation"; REVERTED=1; return 0; fi
done
log "REVERT: !!! COULD NOT CONFIRM — still grid-priority. Manually run: raw POP1 to both addcommand topics (POP1, not POP01)."
REVERTED=1
}
trap 'revert; log "exit"' EXIT INT TERM
# read packs -> "minSoC maxSoC maxcell maxtemp minV maxV totI ndone"
read_packs(){
local socs=() cells=() temps=() vs=() is=() i s t tmax
for i in 1 2 3 4 5 6; do
socs+=("$(st "$(P $i soc)")"); cells+=("$(st "$(P $i cell_voltage_max)")")
vs+=("$(st "$(P $i pack_voltage)")"); is+=("$(st "$(P $i pack_current)")")
tmax=0
for s in temperature_pcb temperature_01 temperature_02 temperature_03; do
t="$(tc "$(P $i $s)")"; t=${t%.*}; [[ "$t" =~ ^-?[0-9]+$ ]] && [ "$t" -gt "$tmax" ] && tmax=$t
done
temps+=("$tmax")
done
python3 - "${socs[*]}" "${cells[*]}" "${temps[*]}" "${vs[*]}" "${is[*]}" <<'PY'
import sys
f=lambda a:[float(x) for x in a.split() if x]
soc,cell,tmp,v,i=map(f,sys.argv[1:6])
print(f"{min(soc):.0f} {max(soc):.0f} {max(cell):.3f} {max(tmp):.0f} {min(v):.2f} {max(v):.2f} {sum(i):.0f} {len([s for s in soc if s>=99])}")
PY
}
log "=== grid-cal-monitor start (auto-revert on full/abort/exit) ==="
START=$(date +%s); taper_hits=0
while :; do
read MNS MXS MXCELL MXT MNV MXV TOTI NDONE <<<"$(read_packs)"
el=$(( ($(date +%s)-START)/60 ))
log "[+${el}m] SoC ${MNS}-${MXS}% packs@100=${NDONE}/6 | packV ${MNV}-${MXV} | totI ${TOTI}A | maxcell ${MXCELL}V maxtemp ${MXT}C"
# SAFETY
if (( $(python3 -c "print(1 if $MXCELL>$CELL_ABORT or $MXT>$TEMP_ABORT else 0)") )); then
log "!!! SAFETY ABORT: maxcell ${MXCELL}V / maxtemp ${MXT}C — reverting now"; exit 2; fi
# DONE: all re-anchored
if [ "$NDONE" = 6 ]; then log "COMPLETE: all 6 packs >=${SOC_DONE}% — re-anchored"; break; fi
# DONE backstop: at bulk + tapered for 2 consecutive polls
if (( $(python3 -c "print(1 if $MXV>=$VFULL and $TOTI<$ITAPER else 0)") )); then
taper_hits=$((taper_hits+1)); log " (at bulk + tapered, ${taper_hits}/2)"
[ "$taper_hits" -ge 2 ] && { log "COMPLETE: bulk reached + current tapered"; break; }
else taper_hits=0; fi
# TIMEOUT
if [ "$el" -ge $((MAX_HOURS*60)) ]; then log "TIMEOUT ${MAX_HOURS}h — reverting at packV ${MXV}"; break; fi
sleep "$POLL_S"
done
read MNS MXS _ _ _ MXV _ NDONE <<<"$(read_packs)"
log "RESULT: SoC ${MNS}-${MXS}%, packs@100=${NDONE}/6, packV up to ${MXV}"
# revert runs via trap