Files
shaggy-solar/eg4battery/tmp/lv-disasm
2026-04-25 19:00:44 -04:00

282 lines
10 KiB
Plaintext
Executable File

#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "lief>=0.14",
# "capstone>=5.0",
# ]
# ///
"""
lv-disasm — refine the EG4 LP4V2 register map by static analysis of
lv_host.app's Mach-O binary.
Three layers, runnable separately or all at once:
--l1 load-offset histogram from BmsMonitoring::allFunctionModbusAnalysis
--l2 __DATA scan for usModbusReg* / register-base constants
--l3 Qt widget map (lb_cell_N, etc.) cross-referenced with parser code
--all run all three (default)
"""
from __future__ import annotations
import argparse
import re
import sys
from collections import Counter, defaultdict
from pathlib import Path
import lief
import capstone
BIN = Path("/home/noise/solar/eg4battery/tmp/bms-tool-ref/lv_host.app/Contents/MacOS/lv_host")
def load_binary(path: Path) -> lief.MachO.Binary:
fat = lief.MachO.parse(str(path))
if fat is None:
sys.exit(f"could not parse {path}")
if hasattr(fat, "at"):
return fat.at(0) # FatBinary
return fat # already a single Binary
def find_func(binary: lief.MachO.Binary, *needles: str) -> list[lief.MachO.Symbol]:
"""Return symbols whose name contains any of the given substrings."""
out: list[lief.MachO.Symbol] = []
for sym in binary.symbols:
if not sym.name:
continue
for n in needles:
if n in sym.name:
out.append(sym)
break
return out
def func_bytes(binary: lief.MachO.Binary, sym: lief.MachO.Symbol) -> tuple[int, bytes] | None:
"""Return (vaddr, code_bytes) for a function symbol. Walks until the next
symbol on the same section or end of section."""
addr = sym.value
text_section = None
for s in binary.sections:
if s.virtual_address <= addr < s.virtual_address + s.size:
text_section = s
break
if text_section is None:
return None
# walk symbols sorted by addr in same section, find next one after this
others = sorted(
(s for s in binary.symbols
if s.value > addr and text_section.virtual_address <= s.value < text_section.virtual_address + text_section.size
and s.value != addr),
key=lambda s: s.value,
)
end = others[0].value if others else text_section.virtual_address + text_section.size
file_off = addr - text_section.virtual_address + text_section.offset
return addr, bytes(binary.get_content_from_virtual_address(addr, end - addr))
def disasm_x86_64(addr: int, code: bytes) -> list[tuple[int, str, str]]:
md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
md.detail = True
return [(i.address, i.mnemonic, i.op_str) for i in md.disasm(code, addr)]
# ============================================================================
# === Layer 1: load-offset histogram =========================================
# ============================================================================
# Capture u16-load patterns. We care about every read of a 2-byte word from
# a memory operand with displacement — especially `movzx eax, word ptr [reg + N]`.
_LOAD_PATTERNS = [
# mov ax, word ptr [reg + imm] or movzx eax, word ptr [reg + imm]
re.compile(r"word ptr \[\w+ ([+\-] 0x[0-9a-fA-F]+|[+\-] \d+)\]"),
# mov ax, word ptr [reg + reg2 + imm]
re.compile(r"word ptr \[\w+ \+ \w+ ([+\-] 0x[0-9a-fA-F]+|[+\-] \d+)\]"),
]
def layer1(binary: lief.MachO.Binary) -> None:
print("=" * 72)
print("LAYER 1 — load-offset histogram in *ModbusAnalysis* functions")
print("=" * 72)
targets = find_func(binary, "ModbusAnalysis", "allFunctionModbus")
if not targets:
print(" no Modbus-analysis symbols found")
return
for sym in targets:
result = func_bytes(binary, sym)
if result is None:
print(f"\n {sym.name}: no code bytes")
continue
addr, code = result
print(f"\n function: {sym.name}")
print(f" vaddr: 0x{addr:08x}")
print(f" bytes: {len(code)}")
if not code:
continue
instrs = disasm_x86_64(addr, code)
if not instrs:
print(" (capstone returned no instructions)")
continue
# Count word-pointer loads — these read register values from the response buffer
offsets: Counter[int] = Counter()
for ia, mnem, ops in instrs:
if "word ptr" not in ops:
continue
# extract displacement
m = re.search(r"\[[^\]]+([+\-]) (?:0x([0-9a-fA-F]+)|(\d+))\]", ops)
if m:
sign = -1 if m.group(1) == "-" else 1
imm = int(m.group(2), 16) if m.group(2) else int(m.group(3))
offsets[sign * imm] += 1
print(f" instructions: {len(instrs)}")
print(f" distinct word-ptr offsets observed: {len(offsets)}")
if offsets:
# print all positive offsets sorted
print(" offset hex reg# (offset/2 if data starts at 0) count")
for off, cnt in sorted(offsets.items()):
if off < 0 or off > 200:
continue
reg_hint = f"reg {off // 2}" if off % 2 == 0 else "(odd offset)"
print(f" {off:>6} 0x{off:04x} {reg_hint:<35} {cnt}")
# ============================================================================
# === Layer 2: data-section scan for register-base constants =================
# ============================================================================
def layer2(binary: lief.MachO.Binary) -> None:
print()
print("=" * 72)
print("LAYER 2 — globals: usModbusReg* and friends")
print("=" * 72)
needles = ["usModbus", "RegBase", "regBase"]
syms = find_func(binary, *needles)
if not syms:
print(" no Modbus register-base symbols found")
return
for sym in syms:
addr = sym.value
# try to read up to 64 bytes (32 u16 entries) starting at this symbol
try:
data = bytes(binary.get_content_from_virtual_address(addr, 64))
except Exception as e:
print(f" {sym.name} @ 0x{addr:x}: cannot read ({e})")
continue
# interpret as u16 little-endian
u16s = [int.from_bytes(data[i:i + 2], "little") for i in range(0, len(data) - 1, 2)]
# find the trailing run of zeros and trim
while u16s and u16s[-1] == 0:
u16s.pop()
print(f"\n {sym.name} @ 0x{addr:08x}")
print(f" size : {len(data)} bytes")
print(f" u16[]: {[f'0x{v:04x}' for v in u16s[:24]]}")
# ============================================================================
# === Layer 3: Qt widget names + cross-reference =============================
# ============================================================================
def layer3(binary: lief.MachO.Binary) -> None:
print()
print("=" * 72)
print("LAYER 3 — Qt widget names + cross-reference with parser code")
print("=" * 72)
# Pull the binary's full string table (cstrings section)
text_strs: list[str] = []
cstrings = next((s for s in binary.sections if s.name == "__cstring"), None)
if cstrings is None:
print(" no __cstring section?")
return
raw = bytes(cstrings.content)
# extract null-terminated ASCII strings
cur = bytearray()
for b in raw:
if 32 <= b < 127:
cur.append(b)
else:
if len(cur) >= 4:
text_strs.append(cur.decode("ascii"))
cur = bytearray()
if cur:
if len(cur) >= 4:
text_strs.append(cur.decode("ascii"))
# Identify candidate widget object names — Qt's setObjectName values typically
# follow snake_case_with_index patterns
widgets: dict[str, list[str]] = defaultdict(list)
pattern_groups = {
"cell_N": re.compile(r"^lb_cell_\d+$|^cell_\d+$"),
"warning_N": re.compile(r"^warning_\d+$"),
"protection_N": re.compile(r"^protection_\d+$"),
"error_N": re.compile(r"^error_\d+$"),
"temp_N": re.compile(r"^temp_\d+$|^Temp0\d+$"),
"named_field_label": re.compile(
r"^(model|com_state|serial_num|ver|cell_num|capacity|"
r"voltage|current|temperature|soc|soh|cycle_count)$",
re.I,
),
}
for s in text_strs:
for kind, rx in pattern_groups.items():
if rx.match(s):
widgets[kind].append(s)
break
for kind, names in widgets.items():
print(f"\n {kind:<22} ({len(names)} found):")
for n in sorted(set(names)):
print(f" {n}")
# also list all object names that look like Qt widget identifiers
qt_widget = re.compile(r"^(lb_|cb_|le_|pb_|sb_|btn_|gridLayout|horizontalLayout|verticalLayout)")
qt_names = sorted({s for s in text_strs if qt_widget.match(s)})
print(f"\n Other Qt-widget-like names ({len(qt_names)}):")
for n in qt_names[:30]:
print(f" {n}")
if len(qt_names) > 30:
print(f" ... and {len(qt_names) - 30} more")
# ============================================================================
# === main ===================================================================
# ============================================================================
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--bin", default=str(BIN), help="path to lv_host Mach-O")
ap.add_argument("--l1", action="store_true")
ap.add_argument("--l2", action="store_true")
ap.add_argument("--l3", action="store_true")
ap.add_argument("--all", action="store_true")
args = ap.parse_args()
if not (args.l1 or args.l2 or args.l3 or args.all):
args.all = True
print(f"Binary: {args.bin}")
binary = load_binary(Path(args.bin))
print(f"Loaded: {len(binary.symbols)} symbols, {len(binary.sections)} sections")
if args.l1 or args.all:
layer1(binary)
if args.l2 or args.all:
layer2(binary)
if args.l3 or args.all:
layer3(binary)
return 0
if __name__ == "__main__":
sys.exit(main())