282 lines
10 KiB
Plaintext
Executable File
282 lines
10 KiB
Plaintext
Executable File
#!/usr/bin/env -S uv run --script
|
|
# /// script
|
|
# requires-python = ">=3.11"
|
|
# dependencies = [
|
|
# "lief>=0.14",
|
|
# "capstone>=5.0",
|
|
# ]
|
|
# ///
|
|
"""
|
|
lv-disasm — refine the EG4 LP4V2 register map by static analysis of
|
|
lv_host.app's Mach-O binary.
|
|
|
|
Three layers, runnable separately or all at once:
|
|
--l1 load-offset histogram from BmsMonitoring::allFunctionModbusAnalysis
|
|
--l2 __DATA scan for usModbusReg* / register-base constants
|
|
--l3 Qt widget map (lb_cell_N, etc.) cross-referenced with parser code
|
|
|
|
--all run all three (default)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
from collections import Counter, defaultdict
|
|
from pathlib import Path
|
|
|
|
import lief
|
|
import capstone
|
|
|
|
BIN = Path("/home/noise/solar/eg4battery/tmp/bms-tool-ref/lv_host.app/Contents/MacOS/lv_host")
|
|
|
|
|
|
def load_binary(path: Path) -> lief.MachO.Binary:
|
|
fat = lief.MachO.parse(str(path))
|
|
if fat is None:
|
|
sys.exit(f"could not parse {path}")
|
|
if hasattr(fat, "at"):
|
|
return fat.at(0) # FatBinary
|
|
return fat # already a single Binary
|
|
|
|
|
|
def find_func(binary: lief.MachO.Binary, *needles: str) -> list[lief.MachO.Symbol]:
|
|
"""Return symbols whose name contains any of the given substrings."""
|
|
out: list[lief.MachO.Symbol] = []
|
|
for sym in binary.symbols:
|
|
if not sym.name:
|
|
continue
|
|
for n in needles:
|
|
if n in sym.name:
|
|
out.append(sym)
|
|
break
|
|
return out
|
|
|
|
|
|
def func_bytes(binary: lief.MachO.Binary, sym: lief.MachO.Symbol) -> tuple[int, bytes] | None:
|
|
"""Return (vaddr, code_bytes) for a function symbol. Walks until the next
|
|
symbol on the same section or end of section."""
|
|
addr = sym.value
|
|
text_section = None
|
|
for s in binary.sections:
|
|
if s.virtual_address <= addr < s.virtual_address + s.size:
|
|
text_section = s
|
|
break
|
|
if text_section is None:
|
|
return None
|
|
# walk symbols sorted by addr in same section, find next one after this
|
|
others = sorted(
|
|
(s for s in binary.symbols
|
|
if s.value > addr and text_section.virtual_address <= s.value < text_section.virtual_address + text_section.size
|
|
and s.value != addr),
|
|
key=lambda s: s.value,
|
|
)
|
|
end = others[0].value if others else text_section.virtual_address + text_section.size
|
|
file_off = addr - text_section.virtual_address + text_section.offset
|
|
return addr, bytes(binary.get_content_from_virtual_address(addr, end - addr))
|
|
|
|
|
|
def disasm_x86_64(addr: int, code: bytes) -> list[tuple[int, str, str]]:
|
|
md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64)
|
|
md.detail = True
|
|
return [(i.address, i.mnemonic, i.op_str) for i in md.disasm(code, addr)]
|
|
|
|
|
|
# ============================================================================
|
|
# === Layer 1: load-offset histogram =========================================
|
|
# ============================================================================
|
|
|
|
|
|
# Capture u16-load patterns. We care about every read of a 2-byte word from
|
|
# a memory operand with displacement — especially `movzx eax, word ptr [reg + N]`.
|
|
_LOAD_PATTERNS = [
|
|
# mov ax, word ptr [reg + imm] or movzx eax, word ptr [reg + imm]
|
|
re.compile(r"word ptr \[\w+ ([+\-] 0x[0-9a-fA-F]+|[+\-] \d+)\]"),
|
|
# mov ax, word ptr [reg + reg2 + imm]
|
|
re.compile(r"word ptr \[\w+ \+ \w+ ([+\-] 0x[0-9a-fA-F]+|[+\-] \d+)\]"),
|
|
]
|
|
|
|
|
|
def layer1(binary: lief.MachO.Binary) -> None:
|
|
print("=" * 72)
|
|
print("LAYER 1 — load-offset histogram in *ModbusAnalysis* functions")
|
|
print("=" * 72)
|
|
|
|
targets = find_func(binary, "ModbusAnalysis", "allFunctionModbus")
|
|
if not targets:
|
|
print(" no Modbus-analysis symbols found")
|
|
return
|
|
|
|
for sym in targets:
|
|
result = func_bytes(binary, sym)
|
|
if result is None:
|
|
print(f"\n {sym.name}: no code bytes")
|
|
continue
|
|
addr, code = result
|
|
print(f"\n function: {sym.name}")
|
|
print(f" vaddr: 0x{addr:08x}")
|
|
print(f" bytes: {len(code)}")
|
|
if not code:
|
|
continue
|
|
instrs = disasm_x86_64(addr, code)
|
|
if not instrs:
|
|
print(" (capstone returned no instructions)")
|
|
continue
|
|
|
|
# Count word-pointer loads — these read register values from the response buffer
|
|
offsets: Counter[int] = Counter()
|
|
for ia, mnem, ops in instrs:
|
|
if "word ptr" not in ops:
|
|
continue
|
|
# extract displacement
|
|
m = re.search(r"\[[^\]]+([+\-]) (?:0x([0-9a-fA-F]+)|(\d+))\]", ops)
|
|
if m:
|
|
sign = -1 if m.group(1) == "-" else 1
|
|
imm = int(m.group(2), 16) if m.group(2) else int(m.group(3))
|
|
offsets[sign * imm] += 1
|
|
print(f" instructions: {len(instrs)}")
|
|
print(f" distinct word-ptr offsets observed: {len(offsets)}")
|
|
if offsets:
|
|
# print all positive offsets sorted
|
|
print(" offset hex reg# (offset/2 if data starts at 0) count")
|
|
for off, cnt in sorted(offsets.items()):
|
|
if off < 0 or off > 200:
|
|
continue
|
|
reg_hint = f"reg {off // 2}" if off % 2 == 0 else "(odd offset)"
|
|
print(f" {off:>6} 0x{off:04x} {reg_hint:<35} {cnt}")
|
|
|
|
|
|
# ============================================================================
|
|
# === Layer 2: data-section scan for register-base constants =================
|
|
# ============================================================================
|
|
|
|
|
|
def layer2(binary: lief.MachO.Binary) -> None:
|
|
print()
|
|
print("=" * 72)
|
|
print("LAYER 2 — globals: usModbusReg* and friends")
|
|
print("=" * 72)
|
|
|
|
needles = ["usModbus", "RegBase", "regBase"]
|
|
syms = find_func(binary, *needles)
|
|
if not syms:
|
|
print(" no Modbus register-base symbols found")
|
|
return
|
|
|
|
for sym in syms:
|
|
addr = sym.value
|
|
# try to read up to 64 bytes (32 u16 entries) starting at this symbol
|
|
try:
|
|
data = bytes(binary.get_content_from_virtual_address(addr, 64))
|
|
except Exception as e:
|
|
print(f" {sym.name} @ 0x{addr:x}: cannot read ({e})")
|
|
continue
|
|
# interpret as u16 little-endian
|
|
u16s = [int.from_bytes(data[i:i + 2], "little") for i in range(0, len(data) - 1, 2)]
|
|
# find the trailing run of zeros and trim
|
|
while u16s and u16s[-1] == 0:
|
|
u16s.pop()
|
|
print(f"\n {sym.name} @ 0x{addr:08x}")
|
|
print(f" size : {len(data)} bytes")
|
|
print(f" u16[]: {[f'0x{v:04x}' for v in u16s[:24]]}")
|
|
|
|
|
|
# ============================================================================
|
|
# === Layer 3: Qt widget names + cross-reference =============================
|
|
# ============================================================================
|
|
|
|
|
|
def layer3(binary: lief.MachO.Binary) -> None:
|
|
print()
|
|
print("=" * 72)
|
|
print("LAYER 3 — Qt widget names + cross-reference with parser code")
|
|
print("=" * 72)
|
|
|
|
# Pull the binary's full string table (cstrings section)
|
|
text_strs: list[str] = []
|
|
cstrings = next((s for s in binary.sections if s.name == "__cstring"), None)
|
|
if cstrings is None:
|
|
print(" no __cstring section?")
|
|
return
|
|
raw = bytes(cstrings.content)
|
|
# extract null-terminated ASCII strings
|
|
cur = bytearray()
|
|
for b in raw:
|
|
if 32 <= b < 127:
|
|
cur.append(b)
|
|
else:
|
|
if len(cur) >= 4:
|
|
text_strs.append(cur.decode("ascii"))
|
|
cur = bytearray()
|
|
if cur:
|
|
if len(cur) >= 4:
|
|
text_strs.append(cur.decode("ascii"))
|
|
|
|
# Identify candidate widget object names — Qt's setObjectName values typically
|
|
# follow snake_case_with_index patterns
|
|
widgets: dict[str, list[str]] = defaultdict(list)
|
|
pattern_groups = {
|
|
"cell_N": re.compile(r"^lb_cell_\d+$|^cell_\d+$"),
|
|
"warning_N": re.compile(r"^warning_\d+$"),
|
|
"protection_N": re.compile(r"^protection_\d+$"),
|
|
"error_N": re.compile(r"^error_\d+$"),
|
|
"temp_N": re.compile(r"^temp_\d+$|^Temp0\d+$"),
|
|
"named_field_label": re.compile(
|
|
r"^(model|com_state|serial_num|ver|cell_num|capacity|"
|
|
r"voltage|current|temperature|soc|soh|cycle_count)$",
|
|
re.I,
|
|
),
|
|
}
|
|
for s in text_strs:
|
|
for kind, rx in pattern_groups.items():
|
|
if rx.match(s):
|
|
widgets[kind].append(s)
|
|
break
|
|
|
|
for kind, names in widgets.items():
|
|
print(f"\n {kind:<22} ({len(names)} found):")
|
|
for n in sorted(set(names)):
|
|
print(f" {n}")
|
|
|
|
# also list all object names that look like Qt widget identifiers
|
|
qt_widget = re.compile(r"^(lb_|cb_|le_|pb_|sb_|btn_|gridLayout|horizontalLayout|verticalLayout)")
|
|
qt_names = sorted({s for s in text_strs if qt_widget.match(s)})
|
|
print(f"\n Other Qt-widget-like names ({len(qt_names)}):")
|
|
for n in qt_names[:30]:
|
|
print(f" {n}")
|
|
if len(qt_names) > 30:
|
|
print(f" ... and {len(qt_names) - 30} more")
|
|
|
|
|
|
# ============================================================================
|
|
# === main ===================================================================
|
|
# ============================================================================
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--bin", default=str(BIN), help="path to lv_host Mach-O")
|
|
ap.add_argument("--l1", action="store_true")
|
|
ap.add_argument("--l2", action="store_true")
|
|
ap.add_argument("--l3", action="store_true")
|
|
ap.add_argument("--all", action="store_true")
|
|
args = ap.parse_args()
|
|
if not (args.l1 or args.l2 or args.l3 or args.all):
|
|
args.all = True
|
|
|
|
print(f"Binary: {args.bin}")
|
|
binary = load_binary(Path(args.bin))
|
|
print(f"Loaded: {len(binary.symbols)} symbols, {len(binary.sections)} sections")
|
|
|
|
if args.l1 or args.all:
|
|
layer1(binary)
|
|
if args.l2 or args.all:
|
|
layer2(binary)
|
|
if args.l3 or args.all:
|
|
layer3(binary)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|