Resilience pass: bulletproof recovery + FWS / fault polling + EG4 discovery refresh

This commit is contained in:
2026-04-28 18:10:00 -04:00
parent 04720c3b92
commit 7688fc1dd3
11 changed files with 267 additions and 16 deletions

View File

@@ -89,25 +89,44 @@ async def main() -> int:
break
await asyncio.sleep(0.5)
if not sn_to_path:
print("no LVX6048 devices found on /dev/hidraw*", file=sys.stderr)
return 1
missing = []
# Always-best-effort policy: exit 0 even if no expected serials were
# found, so a transient inverter blip (e.g. the unit being mid-cold-start
# at sunrise) doesn't permanently latch dependent services into a failed
# state via systemd `Requires=`. The periodic timer + powermon's own
# Restart=always handles convergence once the inverter recovers.
changed = False
for sn, link in LINK_FOR_SERIAL.items():
if sn in sn_to_path:
_relink(link, sn_to_path[sn])
print(f"symlink {link} -> {os.path.basename(sn_to_path[sn])}")
target = sn_to_path[sn]
current = os.readlink(link) if os.path.islink(link) else None
current_full = os.path.join(os.path.dirname(link), current) if current else None
if current_full != target:
_relink(link, target)
print(f"symlink {link} -> {os.path.basename(target)}")
changed = True
else:
print(f"symlink {link} -> {os.path.basename(target)} (unchanged)")
else:
missing.append((link, sn))
try:
if os.path.islink(link):
if os.path.islink(link):
try:
os.unlink(link)
except FileNotFoundError:
pass
changed = True
except FileNotFoundError:
pass
print(f"WARNING: {link} serial {sn} not found on any /dev/hidraw*")
return 0 if not missing else 2
# If symlinks actually moved (e.g. hidraw indices flipped post-power-cycle),
# bounce powermon so its open hidraw fds re-bind to the right physical unit.
# Idempotent runs (no symlink change) leave powermon alone.
if changed:
print("symlinks changed — restarting powermon services")
import subprocess
subprocess.Popen(
["/bin/systemctl", "--no-block", "restart",
"powermon.service", "powermon2.service"]
)
return 0
if __name__ == "__main__":