From 8c7b5fb7115b030b55eb17a0f425c276ae23b3d4 Mon Sep 17 00:00:00 2001 From: noise Date: Sat, 9 May 2026 10:18:57 -0400 Subject: [PATCH] fixed resolver --- LVX6048/bin/lvx-resolve-links | 52 +++++++++++++++++----- eg4battery/bin/eg4-battery | 25 +++++++++++ eg4battery/config/eg4-battery.yaml.example | 4 ++ 3 files changed, 70 insertions(+), 11 deletions(-) diff --git a/LVX6048/bin/lvx-resolve-links b/LVX6048/bin/lvx-resolve-links index b90aded..1f84e26 100755 --- a/LVX6048/bin/lvx-resolve-links +++ b/LVX6048/bin/lvx-resolve-links @@ -18,11 +18,18 @@ from __future__ import annotations import asyncio import glob import os +import re import sys SERIAL_UNIT_1 = "1496142109100037000000" SERIAL_UNIT_2 = "1496142408100255000000" +# Real LVX PI18 serials are long ASCII digit strings (the two known units are +# 22 digits each). Anything else — null-byte garbage, the literal "Invalid +# response …" error wrapper string, an empty payload, etc. — is a stuck-firmware +# response that must be classified as a failed probe so we'll retry it. +_SERIAL_RE = re.compile(r"^\d{18,24}$") + LINK_FOR_SERIAL = { SERIAL_UNIT_1: "/dev/lvx6048-1", SERIAL_UNIT_2: "/dev/lvx6048-2", @@ -49,7 +56,12 @@ async def probe_serial(path: str) -> str | None: return None if res is None or not getattr(res, "is_valid", False) or not res.readings: return None - return str(res.readings[0].data_value) + sn = str(res.readings[0].data_value) + # Filter out malformed responses (null-byte garbage, error-wrapper strings) + # — caller treats None as "retry this path on the next tick". + if not _SERIAL_RE.fullmatch(sn): + return None + return sn def _relink(link: str, target: str) -> None: @@ -65,27 +77,45 @@ def _relink(link: str, target: str) -> None: async def main() -> int: # Hot-plug case: udev fires this script as soon as one hidraw appears, but # a sibling inverter coming up at nearly the same moment may still be - # enumerating. Retry-probe up to ~10 s waiting for all expected serials, - # so a transient single-device sighting doesn't leave one symlink missing. + # enumerating. Also covers the stuck-HID-endpoint case: a unit that just + # came up may answer the first PI18 ID query with null bytes for a few + # seconds before its firmware populates the response. Retry-probe up to + # ~20 s with per-path exponential backoff (1 s → 2 → 4 → 8 cap). Backing + # off between failed probes — rather than hammering at fixed 0.5 s pacing + # — gives a confused HID endpoint time to recover instead of compounding + # the confusion. expected = set(LINK_FOR_SERIAL.keys()) - deadline = asyncio.get_event_loop().time() + 10.0 + loop_time = asyncio.get_event_loop().time + deadline = loop_time() + 20.0 sn_to_path: dict[str, str] = {} - seen_paths: set[str] = set() + # Per-path retry state. Paths are removed from `attempts` once they + # yield a recognized serial; paths still in it get re-probed when their + # `next_attempt_at` falls due. Unknown / not-an-LVX paths stay in the + # map and back off to 8 s, so we don't busy-poll dead ports either. + attempts: dict[str, int] = {} + next_attempt_at: dict[str, float] = {} + while True: - candidates = sorted(glob.glob("/dev/hidraw*")) - for p in candidates: - if p in seen_paths: + now = loop_time() + resolved_paths = {p for sn, p in sn_to_path.items() if sn in expected} + for p in sorted(glob.glob("/dev/hidraw*")): + if p in resolved_paths: + continue + if next_attempt_at.get(p, 0.0) > now: continue - seen_paths.add(p) sn = await probe_serial(p) + n = attempts[p] = attempts.get(p, 0) + 1 if sn: print(f"{p}: serial {sn}") sn_to_path[sn] = p + next_attempt_at.pop(p, None) else: - print(f"{p}: no PI18 response (probably not an LVX6048)") + backoff = min(2 ** (n - 1), 8) # 1, 2, 4, 8, 8, … + next_attempt_at[p] = now + backoff + print(f"{p}: no valid PI18 serial (attempt {n}, retry in {backoff}s)") if expected.issubset(sn_to_path): break - if asyncio.get_event_loop().time() >= deadline: + if loop_time() >= deadline: break await asyncio.sleep(0.5) diff --git a/eg4battery/bin/eg4-battery b/eg4battery/bin/eg4-battery index 766614e..8cc2eec 100755 --- a/eg4battery/bin/eg4-battery +++ b/eg4battery/bin/eg4-battery @@ -88,6 +88,11 @@ class MQTTConfig: username: str password: str discovery_prefix: str = "homeassistant" + # Periodically re-publish (retain=True) every discovery config we've ever + # sent, so HA recovers automatically if it loses the entity registration — + # broker restart that purged retained messages, HA missed the initial + # publish, integration glitch. Set to 0 to disable. + discovery_republish_interval_s: float = 1800.0 @dataclasses.dataclass @@ -751,6 +756,7 @@ class MQTTPublisher: self._dry_run = dry_run self._client: mqtt.Client | None = None self._discovered: set[tuple[str, str]] = set() + self._last_discovery_republish_at: float = time.monotonic() if not dry_run: c = mqtt.Client(mqtt.CallbackAPIVersion.VERSION2, client_id="eg4-battery") c.username_pw_set(cfg.username, cfg.password) @@ -776,6 +782,22 @@ class MQTTPublisher: else: self._client.publish(state_topic, payload, qos=0, retain=False) + def maybe_republish_discovery(self) -> None: + """Heartbeat: re-emit every previously sent discovery config if the + configured interval has elapsed. Idempotent (retain=True), so HA + re-picks up any registrations it has lost without operator action.""" + interval = self._cfg.discovery_republish_interval_s + if interval <= 0 or not self._discovered: + return + now = time.monotonic() + if now - self._last_discovery_republish_at < interval: + return + for pack_name, key in self._discovered: + state_topic = f"{self._cfg.discovery_prefix}/sensor/{pack_name}_{key}/state" + self._publish_discovery(pack_name, key, state_topic) + self._last_discovery_republish_at = now + log.info("re-published %d discovery configs (heartbeat)", len(self._discovered)) + def _publish_discovery(self, pack_name: str, key: str, state_topic: str) -> None: unit, device_class, state_class, icon = field_meta(key) cfg = { @@ -866,6 +888,7 @@ def run_active(transport: ActiveTransport, publisher: MQTTPublisher, cfg: AppCon st.ok = False st.last_error_category = category st.consecutive_errors += 1 + publisher.maybe_republish_discovery() if one_cycle: return elapsed = time.monotonic() - cycle_start @@ -881,6 +904,7 @@ def run_passive(listener: PassiveListener, publisher: MQTTPublisher, cfg: AppCon n = 0 for frame in listener.frames(): n += 1 + publisher.maybe_republish_discovery() if trace: log.debug("%r raw=%s", frame, frame.raw.hex(" ")) if frame.kind != "response" or frame.function != 0x03: @@ -969,6 +993,7 @@ def run_modbus_per_pack(cfg: AppConfig, publisher: MQTTPublisher, st.ok = False st.last_error_category = category st.consecutive_errors += 1 + publisher.maybe_republish_discovery() if one_cycle: return elapsed = time.monotonic() - cycle_start diff --git a/eg4battery/config/eg4-battery.yaml.example b/eg4battery/config/eg4-battery.yaml.example index e5276b0..33e023f 100644 --- a/eg4battery/config/eg4-battery.yaml.example +++ b/eg4battery/config/eg4-battery.yaml.example @@ -33,6 +33,10 @@ mqtt: username: password: discovery_prefix: homeassistant + # Re-publish every retained discovery config every N seconds so HA recovers + # automatically if it ever loses entity registrations (broker restart, missed + # initial publish, integration glitch). 0 disables. Default: 1800 (30 min). + # discovery_republish_interval_s: 1800 # One entry per pack. `name` is the HA entity prefix and device identifier. # `address` is the EG4 7E protocol address in active mode (master = 1, slaves