refactor: remove obsolete Nous API test scripts

- Deleted test scripts for Nous API limits, patterns, and temperature checks to streamline the testing suite. - These scripts were no longer necessary and their removal helps maintain a cleaner codebase.
2026-02-21 03:21:13 -08:00 · 2026-02-21 03:21:13 -08:00 · cbff1b818c
commit cbff1b818c
parent a885d2f240
3 changed files with 0 additions and 416 deletions
--- a/tests/test_nous_api_limits.py
+++ b/tests/test_nous_api_limits.py
@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script to diagnose Nous API 400 errors with gemini-2.5-flash model.
-This tests various content lengths and parameters to identify what causes failures.
-"""
-
-import asyncio
-import os
-from openai import AsyncOpenAI
-from dotenv import load_dotenv
-
-# Load environment variables
-load_dotenv()
-
-# Initialize the Nous API client
-nous_client = AsyncOpenAI(
-    api_key=os.getenv("NOUS_API_KEY"),
-    base_url="https://inference-api.nousresearch.com/v1"
-)
-
-MODEL = "gemini-2.5-flash"
-
-async def test_api_call(test_name: str, content_length: int, **kwargs):
-    """Test an API call with specific parameters."""
-    print(f"\n{'='*60}")
-    print(f"Test: {test_name}")
-    print(f"Content length: {content_length:,} characters")
-    print(f"Additional params: {kwargs}")
-    print(f"{'='*60}")
-    
-    # Generate test content
-    content = "A" * content_length
-    
-    system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
-
-Create a well-structured markdown summary that includes:
-1. Key excerpts (quotes, code snippets, important facts) in their original format
-2. Comprehensive summary of all other important information
-3. Proper markdown formatting with headers, bullets, and emphasis
-
-Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized."""
-
-    user_prompt = f"""Please process this web content and create a comprehensive markdown summary:
-
-CONTENT TO PROCESS:
-{content}
-
-Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
-
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ],
-            **kwargs
-        )
-        
-        result = response.choices[0].message.content
-        print(f"✅ SUCCESS")
-        print(f"   Response length: {len(result)} characters")
-        print(f"   Model used: {response.model}")
-        print(f"   Usage: {response.usage}")
-        return True
-        
-    except Exception as e:
-        print(f"❌ FAILED: {str(e)}")
-        return False
-
-async def main():
-    """Run all tests."""
-    print("Testing Nous API with gemini-2.5-flash model")
-    print(f"API Key present: {'Yes' if os.getenv('NOUS_API_KEY') else 'No'}")
-    
-    results = {}
-    
-    # Test 1: Small content (should always work)
-    results['small'] = await test_api_call(
-        "Small content (5,000 chars)",
-        5000,
-        temperature=0.1,
-        max_tokens=4000
-    )
-    await asyncio.sleep(1)
-    
-    # Test 2: Medium content (around what was failing)
-    results['medium'] = await test_api_call(
-        "Medium content (20,000 chars)",
-        20000,
-        temperature=0.1,
-        max_tokens=4000
-    )
-    await asyncio.sleep(1)
-    
-    # Test 3: Large content (79,625 chars like the error)
-    results['large'] = await test_api_call(
-        "Large content (79,625 chars)",
-        79625,
-        temperature=0.1,
-        max_tokens=4000
-    )
-    await asyncio.sleep(1)
-    
-    # Test 4: Very large content (100k chars)
-    results['very_large'] = await test_api_call(
-        "Very large content (100,000 chars)",
-        100000,
-        temperature=0.1,
-        max_tokens=4000
-    )
-    await asyncio.sleep(1)
-    
-    # Test 5: Same as working case but different max_tokens
-    results['diff_max_tokens'] = await test_api_call(
-        "Medium content with higher max_tokens",
-        20000,
-        temperature=0.1,
-        max_tokens=8000
-    )
-    await asyncio.sleep(1)
-    
-    # Test 6: No max_tokens specified
-    results['no_max_tokens'] = await test_api_call(
-        "Medium content without max_tokens",
-        20000,
-        temperature=0.1
-    )
-    await asyncio.sleep(1)
-    
-    # Test 7: With actual web content (mixed characters)
-    mixed_content = """
-    This is a test of web content with various characters:
-    - Unicode: 你好世界 🌍 
-    - Special chars: <>&"'
-    - Numbers: 123456789
-    - Markdown: **bold** _italic_ `code`
-    - URLs: https://example.com
-    """ * 1000  # Repeat to make it ~79k chars
-    
-    print(f"\n{'='*60}")
-    print(f"Test: Mixed content (real-world scenario)")
-    print(f"Content length: {len(mixed_content):,} characters")
-    print(f"{'='*60}")
-    
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": "Summarize this content."},
-                {"role": "user", "content": mixed_content}
-            ],
-            temperature=0.1,
-            max_tokens=4000
-        )
-        print(f"✅ SUCCESS")
-        results['mixed_content'] = True
-    except Exception as e:
-        print(f"❌ FAILED: {str(e)}")
-        results['mixed_content'] = False
-    
-    # Summary
-    print(f"\n{'='*60}")
-    print("SUMMARY OF RESULTS:")
-    print(f"{'='*60}")
-    for test, passed in results.items():
-        status = "✅ PASS" if passed else "❌ FAIL"
-        print(f"{test:20s}: {status}")
-    
-    passed = sum(results.values())
-    total = len(results)
-    print(f"\nTotal: {passed}/{total} tests passed")
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
--- a/tests/test_nous_api_pattern.py
+++ b/tests/test_nous_api_pattern.py
@ -1,131 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test to understand the pattern of failures - it's not about content length!
-"""
-
-import asyncio
-import os
-from openai import AsyncOpenAI
-from dotenv import load_dotenv
-
-load_dotenv()
-
-nous_client = AsyncOpenAI(
-    api_key=os.getenv("NOUS_API_KEY"),
-    base_url="https://inference-api.nousresearch.com/v1"
-)
-
-MODEL = "gemini-2.5-flash"
-
-async def quick_test(description: str, content: str, **kwargs):
-    """Quick API test."""
-    print(f"\n{description} ({len(content):,} chars)...", end=" ")
-    
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": "Summarize this."},
-                {"role": "user", "content": content}
-            ],
-            **kwargs
-        )
-        print(f"✅ SUCCESS")
-        return True
-    except Exception as e:
-        print(f"❌ FAILED: {str(e)[:80]}")
-        return False
-
-async def main():
-    print("Testing different content types and parameters...")
-    
-    # Theory 1: Repeated characters trigger validation
-    print("\n" + "="*60)
-    print("THEORY 1: Repeated characters")
-    print("="*60)
-    await quick_test("Repeated 'A's (5k)", "A" * 5000, temperature=0.1, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("Repeated 'A's (79k)", "A" * 79625, temperature=0.1, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("Varied text (5k)", "Test content. " * 400, temperature=0.1, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("Varied text (79k)", "Test content with variety. " * 3000, temperature=0.1, max_tokens=4000)
-    
-    # Theory 2: max_tokens parameter
-    print("\n" + "="*60)
-    print("THEORY 2: max_tokens parameter")
-    print("="*60)
-    content = "Test " * 4000  # 20k chars
-    await quick_test("max_tokens=4000", content, temperature=0.1, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("max_tokens=8000", content, temperature=0.1, max_tokens=8000)
-    await asyncio.sleep(0.5)
-    await quick_test("max_tokens=2000", content, temperature=0.1, max_tokens=2000)
-    await asyncio.sleep(0.5)
-    await quick_test("No max_tokens", content, temperature=0.1)
-    
-    # Theory 3: Temperature parameter
-    print("\n" + "="*60)
-    print("THEORY 3: Temperature parameter")
-    print("="*60)
-    content = "Test " * 4000
-    await quick_test("temperature=0.1", content, temperature=0.1, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("temperature=0.0", content, temperature=0.0, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("temperature=0.5", content, temperature=0.5, max_tokens=4000)
-    await asyncio.sleep(0.5)
-    await quick_test("No temperature", content, max_tokens=4000)
-    
-    # Theory 4: System prompt impact
-    print("\n" + "="*60)
-    print("THEORY 4: System prompt length")
-    print("="*60)
-    
-    short_system = "Summarize this."
-    long_system = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
-
-Create a well-structured markdown summary that includes:
-1. Key excerpts (quotes, code snippets, important facts) in their original format
-2. Comprehensive summary of all other important information
-3. Proper markdown formatting with headers, bullets, and emphasis
-
-Your goal is to preserve ALL important information while reducing length."""
-    
-    content = "A" * 5000
-    
-    print(f"\nShort system prompt...", end=" ")
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": short_system},
-                {"role": "user", "content": content}
-            ],
-            temperature=0.1,
-            max_tokens=4000
-        )
-        print(f"✅ SUCCESS")
-    except Exception as e:
-        print(f"❌ FAILED")
-    
-    await asyncio.sleep(0.5)
-    
-    print(f"Long system prompt...", end=" ")
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": long_system},
-                {"role": "user", "content": content}
-            ],
-            temperature=0.1,
-            max_tokens=4000
-        )
-        print(f"✅ SUCCESS")
-    except Exception as e:
-        print(f"❌ FAILED")
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
--- a/tests/test_temperature_fix.py
+++ b/tests/test_temperature_fix.py
@ -1,109 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test to confirm: temperature < 0.3 causes failures on Nous API
-"""
-
-import asyncio
-import os
-from openai import AsyncOpenAI
-from dotenv import load_dotenv
-
-load_dotenv()
-
-nous_client = AsyncOpenAI(
-    api_key=os.getenv("NOUS_API_KEY"),
-    base_url="https://inference-api.nousresearch.com/v1"
-)
-
-MODEL = "gemini-2.5-flash"
-
-async def test_temp(temp_value):
-    """Test a specific temperature value."""
-    content = "Test content. " * 1000  # 14k chars
-    
-    print(f"Testing temperature={temp_value}...", end=" ")
-    
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": "Summarize this content."},
-                {"role": "user", "content": content}
-            ],
-            temperature=temp_value,
-            max_tokens=4000
-        )
-        print(f"✅ SUCCESS")
-        return True
-    except Exception as e:
-        print(f"❌ FAILED")
-        return False
-
-async def main():
-    print("Testing temperature threshold for Nous API...")
-    print("="*60)
-    
-    temps = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1.0]
-    
-    for temp in temps:
-        await test_temp(temp)
-        await asyncio.sleep(0.5)
-    
-    print("="*60)
-    print("\nNow testing with ACTUAL web_tools.py content and parameters:")
-    print("="*60)
-    
-    # Simulate the actual web_tools.py call
-    system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
-
-Create a well-structured markdown summary that includes:
-1. Key excerpts (quotes, code snippets, important facts) in their original format
-2. Comprehensive summary of all other important information
-3. Proper markdown formatting with headers, bullets, and emphasis
-
-Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized."""
-
-    content = "Sample web page content. " * 3000  # ~75k chars like the real failures
-    
-    user_prompt = f"""Please process this web content and create a comprehensive markdown summary:
-
-CONTENT TO PROCESS:
-{content}
-
-Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
-    
-    print(f"\nActual web_tools call (temp=0.1, {len(content):,} chars)...", end=" ")
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ],
-            temperature=0.1,
-            max_tokens=4000
-        )
-        print(f"✅ SUCCESS")
-    except Exception:
-        print(f"❌ FAILED")
-    
-    await asyncio.sleep(0.5)
-    
-    print(f"Same call but with temp=0.3...", end=" ")
-    try:
-        response = await nous_client.chat.completions.create(
-            model=MODEL,
-            messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ],
-            temperature=0.3,
-            max_tokens=4000
-        )
-        print(f"✅ SUCCESS")
-    except Exception:
-        print(f"❌ FAILED")
-
-if __name__ == "__main__":
-    asyncio.run(main())
-