Files
localgenai/testing/qwen3-coder-30b/example_usage.py

70 lines
2.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Example usage of the LLM Test Framework with real models
"""
import time
import json
from pathlib import Path
from llm_test_framework import TestSuite, MockLLM
# Example tasks for testing
SAMPLE_TASKS = [
{
"name": "Hello World Function",
"prompt": "Write a Python function that prints 'Hello, World!'",
"expected_output": "print('Hello, World!')",
"parameters": {"temperature": 0.3}
},
{
"name": "Sum Function",
"prompt": "Write a Python function to calculate the sum of two numbers",
"expected_output": "return a + b",
"parameters": {"temperature": 0.5}
},
{
"name": "FizzBuzz",
"prompt": "Write a Python function to solve the FizzBuzz problem",
"expected_output": "if i % 3 == 0 and i % 5 == 0",
"parameters": {"temperature": 0.7}
}
]
def main():
print("Starting LLM Performance Testing Framework")
print("=" * 50)
# Create test suite
suite = TestSuite("sample_test_results")
# Add some sample models
print("Adding sample models...")
suite.add_model(MockLLM("Model_A", {"context": 2048}))
suite.add_model(MockLLM("Model_B", {"context": 4096}))
# Run tests
print("Running sample tests...")
suite.run_all_tests(SAMPLE_TASKS)
# Save results
print("Saving results...")
suite.save_results("sample_results.json")
# Show summary
print("\n" + "=" * 50)
print("TEST SUMMARY")
print("=" * 50)
for result in suite.results:
print(f"Model: {result.model_name}")
print(f"Task: {result.task_name}")
print(f"Response Time: {result.response_time:.2f}s")
print(f"Quality Score: {result.quality_score:.1f}/100")
print(f"Success: {'Yes' if result.success else 'No'}")
print("-" * 30)
print(f"\nTotal tests run: {len(suite.results)}")
print(f"Results saved to: {suite.output_dir}")
if __name__ == "__main__":
main()