#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.11" # dependencies = [ # "lief>=0.14", # "capstone>=5.0", # ] # /// """ lv-disasm — refine the EG4 LP4V2 register map by static analysis of lv_host.app's Mach-O binary. Three layers, runnable separately or all at once: --l1 load-offset histogram from BmsMonitoring::allFunctionModbusAnalysis --l2 __DATA scan for usModbusReg* / register-base constants --l3 Qt widget map (lb_cell_N, etc.) cross-referenced with parser code --all run all three (default) """ from __future__ import annotations import argparse import re import sys from collections import Counter, defaultdict from pathlib import Path import lief import capstone BIN = Path("/home/noise/solar/eg4battery/tmp/bms-tool-ref/lv_host.app/Contents/MacOS/lv_host") def load_binary(path: Path) -> lief.MachO.Binary: fat = lief.MachO.parse(str(path)) if fat is None: sys.exit(f"could not parse {path}") if hasattr(fat, "at"): return fat.at(0) # FatBinary return fat # already a single Binary def find_func(binary: lief.MachO.Binary, *needles: str) -> list[lief.MachO.Symbol]: """Return symbols whose name contains any of the given substrings.""" out: list[lief.MachO.Symbol] = [] for sym in binary.symbols: if not sym.name: continue for n in needles: if n in sym.name: out.append(sym) break return out def func_bytes(binary: lief.MachO.Binary, sym: lief.MachO.Symbol) -> tuple[int, bytes] | None: """Return (vaddr, code_bytes) for a function symbol. Walks until the next symbol on the same section or end of section.""" addr = sym.value text_section = None for s in binary.sections: if s.virtual_address <= addr < s.virtual_address + s.size: text_section = s break if text_section is None: return None # walk symbols sorted by addr in same section, find next one after this others = sorted( (s for s in binary.symbols if s.value > addr and text_section.virtual_address <= s.value < text_section.virtual_address + text_section.size and s.value != addr), key=lambda s: s.value, ) end = others[0].value if others else text_section.virtual_address + text_section.size file_off = addr - text_section.virtual_address + text_section.offset return addr, bytes(binary.get_content_from_virtual_address(addr, end - addr)) def disasm_x86_64(addr: int, code: bytes) -> list[tuple[int, str, str]]: md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) md.detail = True return [(i.address, i.mnemonic, i.op_str) for i in md.disasm(code, addr)] # ============================================================================ # === Layer 1: load-offset histogram ========================================= # ============================================================================ # Capture u16-load patterns. We care about every read of a 2-byte word from # a memory operand with displacement — especially `movzx eax, word ptr [reg + N]`. _LOAD_PATTERNS = [ # mov ax, word ptr [reg + imm] or movzx eax, word ptr [reg + imm] re.compile(r"word ptr \[\w+ ([+\-] 0x[0-9a-fA-F]+|[+\-] \d+)\]"), # mov ax, word ptr [reg + reg2 + imm] re.compile(r"word ptr \[\w+ \+ \w+ ([+\-] 0x[0-9a-fA-F]+|[+\-] \d+)\]"), ] def layer1(binary: lief.MachO.Binary) -> None: print("=" * 72) print("LAYER 1 — load-offset histogram in *ModbusAnalysis* functions") print("=" * 72) targets = find_func(binary, "ModbusAnalysis", "allFunctionModbus") if not targets: print(" no Modbus-analysis symbols found") return for sym in targets: result = func_bytes(binary, sym) if result is None: print(f"\n {sym.name}: no code bytes") continue addr, code = result print(f"\n function: {sym.name}") print(f" vaddr: 0x{addr:08x}") print(f" bytes: {len(code)}") if not code: continue instrs = disasm_x86_64(addr, code) if not instrs: print(" (capstone returned no instructions)") continue # Count word-pointer loads — these read register values from the response buffer offsets: Counter[int] = Counter() for ia, mnem, ops in instrs: if "word ptr" not in ops: continue # extract displacement m = re.search(r"\[[^\]]+([+\-]) (?:0x([0-9a-fA-F]+)|(\d+))\]", ops) if m: sign = -1 if m.group(1) == "-" else 1 imm = int(m.group(2), 16) if m.group(2) else int(m.group(3)) offsets[sign * imm] += 1 print(f" instructions: {len(instrs)}") print(f" distinct word-ptr offsets observed: {len(offsets)}") if offsets: # print all positive offsets sorted print(" offset hex reg# (offset/2 if data starts at 0) count") for off, cnt in sorted(offsets.items()): if off < 0 or off > 200: continue reg_hint = f"reg {off // 2}" if off % 2 == 0 else "(odd offset)" print(f" {off:>6} 0x{off:04x} {reg_hint:<35} {cnt}") # ============================================================================ # === Layer 2: data-section scan for register-base constants ================= # ============================================================================ def layer2(binary: lief.MachO.Binary) -> None: print() print("=" * 72) print("LAYER 2 — globals: usModbusReg* and friends") print("=" * 72) needles = ["usModbus", "RegBase", "regBase"] syms = find_func(binary, *needles) if not syms: print(" no Modbus register-base symbols found") return for sym in syms: addr = sym.value # try to read up to 64 bytes (32 u16 entries) starting at this symbol try: data = bytes(binary.get_content_from_virtual_address(addr, 64)) except Exception as e: print(f" {sym.name} @ 0x{addr:x}: cannot read ({e})") continue # interpret as u16 little-endian u16s = [int.from_bytes(data[i:i + 2], "little") for i in range(0, len(data) - 1, 2)] # find the trailing run of zeros and trim while u16s and u16s[-1] == 0: u16s.pop() print(f"\n {sym.name} @ 0x{addr:08x}") print(f" size : {len(data)} bytes") print(f" u16[]: {[f'0x{v:04x}' for v in u16s[:24]]}") # ============================================================================ # === Layer 3: Qt widget names + cross-reference ============================= # ============================================================================ def layer3(binary: lief.MachO.Binary) -> None: print() print("=" * 72) print("LAYER 3 — Qt widget names + cross-reference with parser code") print("=" * 72) # Pull the binary's full string table (cstrings section) text_strs: list[str] = [] cstrings = next((s for s in binary.sections if s.name == "__cstring"), None) if cstrings is None: print(" no __cstring section?") return raw = bytes(cstrings.content) # extract null-terminated ASCII strings cur = bytearray() for b in raw: if 32 <= b < 127: cur.append(b) else: if len(cur) >= 4: text_strs.append(cur.decode("ascii")) cur = bytearray() if cur: if len(cur) >= 4: text_strs.append(cur.decode("ascii")) # Identify candidate widget object names — Qt's setObjectName values typically # follow snake_case_with_index patterns widgets: dict[str, list[str]] = defaultdict(list) pattern_groups = { "cell_N": re.compile(r"^lb_cell_\d+$|^cell_\d+$"), "warning_N": re.compile(r"^warning_\d+$"), "protection_N": re.compile(r"^protection_\d+$"), "error_N": re.compile(r"^error_\d+$"), "temp_N": re.compile(r"^temp_\d+$|^Temp0\d+$"), "named_field_label": re.compile( r"^(model|com_state|serial_num|ver|cell_num|capacity|" r"voltage|current|temperature|soc|soh|cycle_count)$", re.I, ), } for s in text_strs: for kind, rx in pattern_groups.items(): if rx.match(s): widgets[kind].append(s) break for kind, names in widgets.items(): print(f"\n {kind:<22} ({len(names)} found):") for n in sorted(set(names)): print(f" {n}") # also list all object names that look like Qt widget identifiers qt_widget = re.compile(r"^(lb_|cb_|le_|pb_|sb_|btn_|gridLayout|horizontalLayout|verticalLayout)") qt_names = sorted({s for s in text_strs if qt_widget.match(s)}) print(f"\n Other Qt-widget-like names ({len(qt_names)}):") for n in qt_names[:30]: print(f" {n}") if len(qt_names) > 30: print(f" ... and {len(qt_names) - 30} more") # ============================================================================ # === main =================================================================== # ============================================================================ def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--bin", default=str(BIN), help="path to lv_host Mach-O") ap.add_argument("--l1", action="store_true") ap.add_argument("--l2", action="store_true") ap.add_argument("--l3", action="store_true") ap.add_argument("--all", action="store_true") args = ap.parse_args() if not (args.l1 or args.l2 or args.l3 or args.all): args.all = True print(f"Binary: {args.bin}") binary = load_binary(Path(args.bin)) print(f"Loaded: {len(binary.symbols)} symbols, {len(binary.sections)} sections") if args.l1 or args.all: layer1(binary) if args.l2 or args.all: layer2(binary) if args.l3 or args.all: layer3(binary) return 0 if __name__ == "__main__": sys.exit(main())