diff --git a/tests/test_nous_api_limits.py b/tests/test_nous_api_limits.py deleted file mode 100755 index 25265a0c..00000000 --- a/tests/test_nous_api_limits.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to diagnose Nous API 400 errors with gemini-2.5-flash model. -This tests various content lengths and parameters to identify what causes failures. -""" - -import asyncio -import os -from openai import AsyncOpenAI -from dotenv import load_dotenv - -# Load environment variables -load_dotenv() - -# Initialize the Nous API client -nous_client = AsyncOpenAI( - api_key=os.getenv("NOUS_API_KEY"), - base_url="https://inference-api.nousresearch.com/v1" -) - -MODEL = "gemini-2.5-flash" - -async def test_api_call(test_name: str, content_length: int, **kwargs): - """Test an API call with specific parameters.""" - print(f"\n{'='*60}") - print(f"Test: {test_name}") - print(f"Content length: {content_length:,} characters") - print(f"Additional params: {kwargs}") - print(f"{'='*60}") - - # Generate test content - content = "A" * content_length - - system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk. - -Create a well-structured markdown summary that includes: -1. Key excerpts (quotes, code snippets, important facts) in their original format -2. Comprehensive summary of all other important information -3. Proper markdown formatting with headers, bullets, and emphasis - -Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized.""" - - user_prompt = f"""Please process this web content and create a comprehensive markdown summary: - -CONTENT TO PROCESS: -{content} - -Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights.""" - - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} - ], - **kwargs - ) - - result = response.choices[0].message.content - print(f"✅ SUCCESS") - print(f" Response length: {len(result)} characters") - print(f" Model used: {response.model}") - print(f" Usage: {response.usage}") - return True - - except Exception as e: - print(f"❌ FAILED: {str(e)}") - return False - -async def main(): - """Run all tests.""" - print("Testing Nous API with gemini-2.5-flash model") - print(f"API Key present: {'Yes' if os.getenv('NOUS_API_KEY') else 'No'}") - - results = {} - - # Test 1: Small content (should always work) - results['small'] = await test_api_call( - "Small content (5,000 chars)", - 5000, - temperature=0.1, - max_tokens=4000 - ) - await asyncio.sleep(1) - - # Test 2: Medium content (around what was failing) - results['medium'] = await test_api_call( - "Medium content (20,000 chars)", - 20000, - temperature=0.1, - max_tokens=4000 - ) - await asyncio.sleep(1) - - # Test 3: Large content (79,625 chars like the error) - results['large'] = await test_api_call( - "Large content (79,625 chars)", - 79625, - temperature=0.1, - max_tokens=4000 - ) - await asyncio.sleep(1) - - # Test 4: Very large content (100k chars) - results['very_large'] = await test_api_call( - "Very large content (100,000 chars)", - 100000, - temperature=0.1, - max_tokens=4000 - ) - await asyncio.sleep(1) - - # Test 5: Same as working case but different max_tokens - results['diff_max_tokens'] = await test_api_call( - "Medium content with higher max_tokens", - 20000, - temperature=0.1, - max_tokens=8000 - ) - await asyncio.sleep(1) - - # Test 6: No max_tokens specified - results['no_max_tokens'] = await test_api_call( - "Medium content without max_tokens", - 20000, - temperature=0.1 - ) - await asyncio.sleep(1) - - # Test 7: With actual web content (mixed characters) - mixed_content = """ - This is a test of web content with various characters: - - Unicode: 你好世界 🌍 - - Special chars: <>&"' - - Numbers: 123456789 - - Markdown: **bold** _italic_ `code` - - URLs: https://example.com - """ * 1000 # Repeat to make it ~79k chars - - print(f"\n{'='*60}") - print(f"Test: Mixed content (real-world scenario)") - print(f"Content length: {len(mixed_content):,} characters") - print(f"{'='*60}") - - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": "Summarize this content."}, - {"role": "user", "content": mixed_content} - ], - temperature=0.1, - max_tokens=4000 - ) - print(f"✅ SUCCESS") - results['mixed_content'] = True - except Exception as e: - print(f"❌ FAILED: {str(e)}") - results['mixed_content'] = False - - # Summary - print(f"\n{'='*60}") - print("SUMMARY OF RESULTS:") - print(f"{'='*60}") - for test, passed in results.items(): - status = "✅ PASS" if passed else "❌ FAIL" - print(f"{test:20s}: {status}") - - passed = sum(results.values()) - total = len(results) - print(f"\nTotal: {passed}/{total} tests passed") - -if __name__ == "__main__": - asyncio.run(main()) - diff --git a/tests/test_nous_api_pattern.py b/tests/test_nous_api_pattern.py deleted file mode 100644 index d450a6dc..00000000 --- a/tests/test_nous_api_pattern.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -""" -Test to understand the pattern of failures - it's not about content length! -""" - -import asyncio -import os -from openai import AsyncOpenAI -from dotenv import load_dotenv - -load_dotenv() - -nous_client = AsyncOpenAI( - api_key=os.getenv("NOUS_API_KEY"), - base_url="https://inference-api.nousresearch.com/v1" -) - -MODEL = "gemini-2.5-flash" - -async def quick_test(description: str, content: str, **kwargs): - """Quick API test.""" - print(f"\n{description} ({len(content):,} chars)...", end=" ") - - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": "Summarize this."}, - {"role": "user", "content": content} - ], - **kwargs - ) - print(f"✅ SUCCESS") - return True - except Exception as e: - print(f"❌ FAILED: {str(e)[:80]}") - return False - -async def main(): - print("Testing different content types and parameters...") - - # Theory 1: Repeated characters trigger validation - print("\n" + "="*60) - print("THEORY 1: Repeated characters") - print("="*60) - await quick_test("Repeated 'A's (5k)", "A" * 5000, temperature=0.1, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("Repeated 'A's (79k)", "A" * 79625, temperature=0.1, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("Varied text (5k)", "Test content. " * 400, temperature=0.1, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("Varied text (79k)", "Test content with variety. " * 3000, temperature=0.1, max_tokens=4000) - - # Theory 2: max_tokens parameter - print("\n" + "="*60) - print("THEORY 2: max_tokens parameter") - print("="*60) - content = "Test " * 4000 # 20k chars - await quick_test("max_tokens=4000", content, temperature=0.1, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("max_tokens=8000", content, temperature=0.1, max_tokens=8000) - await asyncio.sleep(0.5) - await quick_test("max_tokens=2000", content, temperature=0.1, max_tokens=2000) - await asyncio.sleep(0.5) - await quick_test("No max_tokens", content, temperature=0.1) - - # Theory 3: Temperature parameter - print("\n" + "="*60) - print("THEORY 3: Temperature parameter") - print("="*60) - content = "Test " * 4000 - await quick_test("temperature=0.1", content, temperature=0.1, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("temperature=0.0", content, temperature=0.0, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("temperature=0.5", content, temperature=0.5, max_tokens=4000) - await asyncio.sleep(0.5) - await quick_test("No temperature", content, max_tokens=4000) - - # Theory 4: System prompt impact - print("\n" + "="*60) - print("THEORY 4: System prompt length") - print("="*60) - - short_system = "Summarize this." - long_system = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk. - -Create a well-structured markdown summary that includes: -1. Key excerpts (quotes, code snippets, important facts) in their original format -2. Comprehensive summary of all other important information -3. Proper markdown formatting with headers, bullets, and emphasis - -Your goal is to preserve ALL important information while reducing length.""" - - content = "A" * 5000 - - print(f"\nShort system prompt...", end=" ") - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": short_system}, - {"role": "user", "content": content} - ], - temperature=0.1, - max_tokens=4000 - ) - print(f"✅ SUCCESS") - except Exception as e: - print(f"❌ FAILED") - - await asyncio.sleep(0.5) - - print(f"Long system prompt...", end=" ") - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": long_system}, - {"role": "user", "content": content} - ], - temperature=0.1, - max_tokens=4000 - ) - print(f"✅ SUCCESS") - except Exception as e: - print(f"❌ FAILED") - -if __name__ == "__main__": - asyncio.run(main()) - diff --git a/tests/test_temperature_fix.py b/tests/test_temperature_fix.py deleted file mode 100644 index c0408652..00000000 --- a/tests/test_temperature_fix.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python3 -""" -Test to confirm: temperature < 0.3 causes failures on Nous API -""" - -import asyncio -import os -from openai import AsyncOpenAI -from dotenv import load_dotenv - -load_dotenv() - -nous_client = AsyncOpenAI( - api_key=os.getenv("NOUS_API_KEY"), - base_url="https://inference-api.nousresearch.com/v1" -) - -MODEL = "gemini-2.5-flash" - -async def test_temp(temp_value): - """Test a specific temperature value.""" - content = "Test content. " * 1000 # 14k chars - - print(f"Testing temperature={temp_value}...", end=" ") - - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": "Summarize this content."}, - {"role": "user", "content": content} - ], - temperature=temp_value, - max_tokens=4000 - ) - print(f"✅ SUCCESS") - return True - except Exception as e: - print(f"❌ FAILED") - return False - -async def main(): - print("Testing temperature threshold for Nous API...") - print("="*60) - - temps = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1.0] - - for temp in temps: - await test_temp(temp) - await asyncio.sleep(0.5) - - print("="*60) - print("\nNow testing with ACTUAL web_tools.py content and parameters:") - print("="*60) - - # Simulate the actual web_tools.py call - system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk. - -Create a well-structured markdown summary that includes: -1. Key excerpts (quotes, code snippets, important facts) in their original format -2. Comprehensive summary of all other important information -3. Proper markdown formatting with headers, bullets, and emphasis - -Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized.""" - - content = "Sample web page content. " * 3000 # ~75k chars like the real failures - - user_prompt = f"""Please process this web content and create a comprehensive markdown summary: - -CONTENT TO PROCESS: -{content} - -Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights.""" - - print(f"\nActual web_tools call (temp=0.1, {len(content):,} chars)...", end=" ") - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} - ], - temperature=0.1, - max_tokens=4000 - ) - print(f"✅ SUCCESS") - except Exception: - print(f"❌ FAILED") - - await asyncio.sleep(0.5) - - print(f"Same call but with temp=0.3...", end=" ") - try: - response = await nous_client.chat.completions.create( - model=MODEL, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} - ], - temperature=0.3, - max_tokens=4000 - ) - print(f"✅ SUCCESS") - except Exception: - print(f"❌ FAILED") - -if __name__ == "__main__": - asyncio.run(main()) -