Initial commit: localgenai stack
Containerized local LLM stack for the Framework Desktop / Strix Halo,
plus the OpenCode harness on the Mac side.
- pyinfra/framework/: pyinfra deploy targeting the box
- llama.cpp (Vulkan), vLLM (ROCm), Ollama (ROCm with HSA override
for gfx1151), OpenWebUI
- Beszel (host + container + AMD GPU dashboard via sysfs)
- OpenLIT (LLM fleet metrics)
- Phoenix (per-trace agent waterfall)
- OpenHands (autonomous agent in a Docker sandbox)
- opencode/: OpenCode config + Phoenix bridge plugin (OTel exporter)
- install.sh deploys to ~/.config/opencode/
- StrixHaloSetup.md / StrixHaloMemory.md / Roadmap.md / TODO.md:
documentation and planning
- testing/qwen3-coder-30b/: small evaluation harness
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
70
testing/qwen3-coder-30b/example_usage.py
Executable file
70
testing/qwen3-coder-30b/example_usage.py
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Example usage of the LLM Test Framework with real models
|
||||
"""
|
||||
|
||||
import time
|
||||
import json
|
||||
from pathlib import Path
|
||||
from llm_test_framework import TestSuite, MockLLM
|
||||
|
||||
# Example tasks for testing
|
||||
SAMPLE_TASKS = [
|
||||
{
|
||||
"name": "Hello World Function",
|
||||
"prompt": "Write a Python function that prints 'Hello, World!'",
|
||||
"expected_output": "print('Hello, World!')",
|
||||
"parameters": {"temperature": 0.3}
|
||||
},
|
||||
{
|
||||
"name": "Sum Function",
|
||||
"prompt": "Write a Python function to calculate the sum of two numbers",
|
||||
"expected_output": "return a + b",
|
||||
"parameters": {"temperature": 0.5}
|
||||
},
|
||||
{
|
||||
"name": "FizzBuzz",
|
||||
"prompt": "Write a Python function to solve the FizzBuzz problem",
|
||||
"expected_output": "if i % 3 == 0 and i % 5 == 0",
|
||||
"parameters": {"temperature": 0.7}
|
||||
}
|
||||
]
|
||||
|
||||
def main():
|
||||
print("Starting LLM Performance Testing Framework")
|
||||
print("=" * 50)
|
||||
|
||||
# Create test suite
|
||||
suite = TestSuite("sample_test_results")
|
||||
|
||||
# Add some sample models
|
||||
print("Adding sample models...")
|
||||
suite.add_model(MockLLM("Model_A", {"context": 2048}))
|
||||
suite.add_model(MockLLM("Model_B", {"context": 4096}))
|
||||
|
||||
# Run tests
|
||||
print("Running sample tests...")
|
||||
suite.run_all_tests(SAMPLE_TASKS)
|
||||
|
||||
# Save results
|
||||
print("Saving results...")
|
||||
suite.save_results("sample_results.json")
|
||||
|
||||
# Show summary
|
||||
print("\n" + "=" * 50)
|
||||
print("TEST SUMMARY")
|
||||
print("=" * 50)
|
||||
|
||||
for result in suite.results:
|
||||
print(f"Model: {result.model_name}")
|
||||
print(f"Task: {result.task_name}")
|
||||
print(f"Response Time: {result.response_time:.2f}s")
|
||||
print(f"Quality Score: {result.quality_score:.1f}/100")
|
||||
print(f"Success: {'Yes' if result.success else 'No'}")
|
||||
print("-" * 30)
|
||||
|
||||
print(f"\nTotal tests run: {len(suite.results)}")
|
||||
print(f"Results saved to: {suite.output_dir}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user