Initial commit: localgenai stack

Containerized local LLM stack for the Framework Desktop / Strix Halo, plus the OpenCode harness on the Mac side. - pyinfra/framework/: pyinfra deploy targeting the box - llama.cpp (Vulkan), vLLM (ROCm), Ollama (ROCm with HSA override for gfx1151), OpenWebUI - Beszel (host + container + AMD GPU dashboard via sysfs) - OpenLIT (LLM fleet metrics) - Phoenix (per-trace agent waterfall) - OpenHands (autonomous agent in a Docker sandbox) - opencode/: OpenCode config + Phoenix bridge plugin (OTel exporter) - install.sh deploys to ~/.config/opencode/ - StrixHaloSetup.md / StrixHaloMemory.md / Roadmap.md / TODO.md: documentation and planning - testing/qwen3-coder-30b/: small evaluation harness Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 11:35:10 -04:00
commit 2c4bfefa95
36 changed files with 5265 additions and 0 deletions
--- a/testing/qwen3-coder-30b/example_usage.py
+++ b/testing/qwen3-coder-30b/example_usage.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""
+Example usage of the LLM Test Framework with real models
+"""
+
+import time
+import json
+from pathlib import Path
+from llm_test_framework import TestSuite, MockLLM
+
+# Example tasks for testing
+SAMPLE_TASKS = [
+    {
+        "name": "Hello World Function",
+        "prompt": "Write a Python function that prints 'Hello, World!'",
+        "expected_output": "print('Hello, World!')",
+        "parameters": {"temperature": 0.3}
+    },
+    {
+        "name": "Sum Function",
+        "prompt": "Write a Python function to calculate the sum of two numbers",
+        "expected_output": "return a + b",
+        "parameters": {"temperature": 0.5}
+    },
+    {
+        "name": "FizzBuzz",
+        "prompt": "Write a Python function to solve the FizzBuzz problem",
+        "expected_output": "if i % 3 == 0 and i % 5 == 0",
+        "parameters": {"temperature": 0.7}
+    }
+]
+
+def main():
+    print("Starting LLM Performance Testing Framework")
+    print("=" * 50)
+    
+    # Create test suite
+    suite = TestSuite("sample_test_results")
+    
+    # Add some sample models
+    print("Adding sample models...")
+    suite.add_model(MockLLM("Model_A", {"context": 2048}))
+    suite.add_model(MockLLM("Model_B", {"context": 4096}))
+    
+    # Run tests
+    print("Running sample tests...")
+    suite.run_all_tests(SAMPLE_TASKS)
+    
+    # Save results
+    print("Saving results...")
+    suite.save_results("sample_results.json")
+    
+    # Show summary
+    print("\n" + "=" * 50)
+    print("TEST SUMMARY")
+    print("=" * 50)
+    
+    for result in suite.results:
+        print(f"Model: {result.model_name}")
+        print(f"Task: {result.task_name}")
+        print(f"Response Time: {result.response_time:.2f}s")
+        print(f"Quality Score: {result.quality_score:.1f}/100")
+        print(f"Success: {'Yes' if result.success else 'No'}")
+        print("-" * 30)
+    
+    print(f"\nTotal tests run: {len(suite.results)}")
+    print(f"Results saved to: {suite.output_dir}")
+
+if __name__ == "__main__":
+    main()