#!/usr/bin/env python3 """ Example usage of the LLM Test Framework with real models """ import time import json from pathlib import Path from llm_test_framework import TestSuite, MockLLM # Example tasks for testing SAMPLE_TASKS = [ { "name": "Hello World Function", "prompt": "Write a Python function that prints 'Hello, World!'", "expected_output": "print('Hello, World!')", "parameters": {"temperature": 0.3} }, { "name": "Sum Function", "prompt": "Write a Python function to calculate the sum of two numbers", "expected_output": "return a + b", "parameters": {"temperature": 0.5} }, { "name": "FizzBuzz", "prompt": "Write a Python function to solve the FizzBuzz problem", "expected_output": "if i % 3 == 0 and i % 5 == 0", "parameters": {"temperature": 0.7} } ] def main(): print("Starting LLM Performance Testing Framework") print("=" * 50) # Create test suite suite = TestSuite("sample_test_results") # Add some sample models print("Adding sample models...") suite.add_model(MockLLM("Model_A", {"context": 2048})) suite.add_model(MockLLM("Model_B", {"context": 4096})) # Run tests print("Running sample tests...") suite.run_all_tests(SAMPLE_TASKS) # Save results print("Saving results...") suite.save_results("sample_results.json") # Show summary print("\n" + "=" * 50) print("TEST SUMMARY") print("=" * 50) for result in suite.results: print(f"Model: {result.model_name}") print(f"Task: {result.task_name}") print(f"Response Time: {result.response_time:.2f}s") print(f"Quality Score: {result.quality_score:.1f}/100") print(f"Success: {'Yes' if result.success else 'No'}") print("-" * 30) print(f"\nTotal tests run: {len(suite.results)}") print(f"Results saved to: {suite.output_dir}") if __name__ == "__main__": main()