refactor: remove obsolete Nous API test scripts

- Deleted test scripts for Nous API limits, patterns, and temperature checks to streamline the testing suite. - These scripts were no longer necessary and their removal helps maintain a cleaner codebase.
2026-02-21 03:21:13 -08:00 · 2026-02-21 03:21:13 -08:00 · cbff1b818c
commit cbff1b818c
parent a885d2f240
3 changed files with 0 additions and 416 deletions
--- a/tests/test_nous_api_limits.py
+++ b/tests/test_nous_api_limits.py
@ -1,176 +0,0 @@
 #!/usr/bin/env python3
 """
 Test script to diagnose Nous API 400 errors with gemini-2.5-flash model.
 This tests various content lengths and parameters to identify what causes failures.
 """
 import asyncio
 import os
 from openai import AsyncOpenAI
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Initialize the Nous API client
 nous_client = AsyncOpenAI(
    api_key=os.getenv("NOUS_API_KEY"),
    base_url="https://inference-api.nousresearch.com/v1"
 )
 MODEL = "gemini-2.5-flash"
 async def test_api_call(test_name: str, content_length: int, **kwargs):
    """Test an API call with specific parameters."""
    print(f"\n{'='*60}")
    print(f"Test: {test_name}")
    print(f"Content length: {content_length:,} characters")
    print(f"Additional params: {kwargs}")
    print(f"{'='*60}")
    # Generate test content
    content = "A" * content_length
    system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
 Create a well-structured markdown summary that includes:
 1. Key excerpts (quotes, code snippets, important facts) in their original format
 2. Comprehensive summary of all other important information
 3. Proper markdown formatting with headers, bullets, and emphasis
 Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized."""
    user_prompt = f"""Please process this web content and create a comprehensive markdown summary:
 CONTENT TO PROCESS:
 {content}
 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            **kwargs
        )
        result = response.choices[0].message.content
        print(f"✅ SUCCESS")
        print(f"   Response length: {len(result)} characters")
        print(f"   Model used: {response.model}")
        print(f"   Usage: {response.usage}")
        return True
    except Exception as e:
        print(f"❌ FAILED: {str(e)}")
        return False
 async def main():
    """Run all tests."""
    print("Testing Nous API with gemini-2.5-flash model")
    print(f"API Key present: {'Yes' if os.getenv('NOUS_API_KEY') else 'No'}")
    results = {}
    # Test 1: Small content (should always work)
    results['small'] = await test_api_call(
        "Small content (5,000 chars)",
        5000,
        temperature=0.1,
        max_tokens=4000
    )
    await asyncio.sleep(1)
    # Test 2: Medium content (around what was failing)
    results['medium'] = await test_api_call(
        "Medium content (20,000 chars)",
        20000,
        temperature=0.1,
        max_tokens=4000
    )
    await asyncio.sleep(1)
    # Test 3: Large content (79,625 chars like the error)
    results['large'] = await test_api_call(
        "Large content (79,625 chars)",
        79625,
        temperature=0.1,
        max_tokens=4000
    )
    await asyncio.sleep(1)
    # Test 4: Very large content (100k chars)
    results['very_large'] = await test_api_call(
        "Very large content (100,000 chars)",
        100000,
        temperature=0.1,
        max_tokens=4000
    )
    await asyncio.sleep(1)
    # Test 5: Same as working case but different max_tokens
    results['diff_max_tokens'] = await test_api_call(
        "Medium content with higher max_tokens",
        20000,
        temperature=0.1,
        max_tokens=8000
    )
    await asyncio.sleep(1)
    # Test 6: No max_tokens specified
    results['no_max_tokens'] = await test_api_call(
        "Medium content without max_tokens",
        20000,
        temperature=0.1
    )
    await asyncio.sleep(1)
    # Test 7: With actual web content (mixed characters)
    mixed_content = """
    This is a test of web content with various characters:
    - Unicode: 你好世界 🌍 
    - Special chars: <>&"'
    - Numbers: 123456789
    - Markdown: **bold** _italic_ `code`
    - URLs: https://example.com
    """ * 1000  # Repeat to make it ~79k chars
    print(f"\n{'='*60}")
    print(f"Test: Mixed content (real-world scenario)")
    print(f"Content length: {len(mixed_content):,} characters")
    print(f"{'='*60}")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": "Summarize this content."},
                {"role": "user", "content": mixed_content}
            ],
            temperature=0.1,
            max_tokens=4000
        )
        print(f"✅ SUCCESS")
        results['mixed_content'] = True
    except Exception as e:
        print(f"❌ FAILED: {str(e)}")
        results['mixed_content'] = False
    # Summary
    print(f"\n{'='*60}")
    print("SUMMARY OF RESULTS:")
    print(f"{'='*60}")
    for test, passed in results.items():
        status = "✅ PASS" if passed else "❌ FAIL"
        print(f"{test:20s}: {status}")
    passed = sum(results.values())
    total = len(results)
    print(f"\nTotal: {passed}/{total} tests passed")
 if __name__ == "__main__":
    asyncio.run(main())
--- a/tests/test_nous_api_pattern.py
+++ b/tests/test_nous_api_pattern.py
@ -1,131 +0,0 @@
 #!/usr/bin/env python3
 """
 Test to understand the pattern of failures - it's not about content length!
 """
 import asyncio
 import os
 from openai import AsyncOpenAI
 from dotenv import load_dotenv
 load_dotenv()
 nous_client = AsyncOpenAI(
    api_key=os.getenv("NOUS_API_KEY"),
    base_url="https://inference-api.nousresearch.com/v1"
 )
 MODEL = "gemini-2.5-flash"
 async def quick_test(description: str, content: str, **kwargs):
    """Quick API test."""
    print(f"\n{description} ({len(content):,} chars)...", end=" ")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": "Summarize this."},
                {"role": "user", "content": content}
            ],
            **kwargs
        )
        print(f"✅ SUCCESS")
        return True
    except Exception as e:
        print(f"❌ FAILED: {str(e)[:80]}")
        return False
 async def main():
    print("Testing different content types and parameters...")
    # Theory 1: Repeated characters trigger validation
    print("\n" + "="*60)
    print("THEORY 1: Repeated characters")
    print("="*60)
    await quick_test("Repeated 'A's (5k)", "A" * 5000, temperature=0.1, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("Repeated 'A's (79k)", "A" * 79625, temperature=0.1, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("Varied text (5k)", "Test content. " * 400, temperature=0.1, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("Varied text (79k)", "Test content with variety. " * 3000, temperature=0.1, max_tokens=4000)
    # Theory 2: max_tokens parameter
    print("\n" + "="*60)
    print("THEORY 2: max_tokens parameter")
    print("="*60)
    content = "Test " * 4000  # 20k chars
    await quick_test("max_tokens=4000", content, temperature=0.1, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("max_tokens=8000", content, temperature=0.1, max_tokens=8000)
    await asyncio.sleep(0.5)
    await quick_test("max_tokens=2000", content, temperature=0.1, max_tokens=2000)
    await asyncio.sleep(0.5)
    await quick_test("No max_tokens", content, temperature=0.1)
    # Theory 3: Temperature parameter
    print("\n" + "="*60)
    print("THEORY 3: Temperature parameter")
    print("="*60)
    content = "Test " * 4000
    await quick_test("temperature=0.1", content, temperature=0.1, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("temperature=0.0", content, temperature=0.0, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("temperature=0.5", content, temperature=0.5, max_tokens=4000)
    await asyncio.sleep(0.5)
    await quick_test("No temperature", content, max_tokens=4000)
    # Theory 4: System prompt impact
    print("\n" + "="*60)
    print("THEORY 4: System prompt length")
    print("="*60)
    short_system = "Summarize this."
    long_system = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
 Create a well-structured markdown summary that includes:
 1. Key excerpts (quotes, code snippets, important facts) in their original format
 2. Comprehensive summary of all other important information
 3. Proper markdown formatting with headers, bullets, and emphasis
 Your goal is to preserve ALL important information while reducing length."""
    content = "A" * 5000
    print(f"\nShort system prompt...", end=" ")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": short_system},
                {"role": "user", "content": content}
            ],
            temperature=0.1,
            max_tokens=4000
        )
        print(f"✅ SUCCESS")
    except Exception as e:
        print(f"❌ FAILED")
    await asyncio.sleep(0.5)
    print(f"Long system prompt...", end=" ")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": long_system},
                {"role": "user", "content": content}
            ],
            temperature=0.1,
            max_tokens=4000
        )
        print(f"✅ SUCCESS")
    except Exception as e:
        print(f"❌ FAILED")
 if __name__ == "__main__":
    asyncio.run(main())
--- a/tests/test_temperature_fix.py
+++ b/tests/test_temperature_fix.py
@ -1,109 +0,0 @@
 #!/usr/bin/env python3
 """
 Test to confirm: temperature < 0.3 causes failures on Nous API
 """
 import asyncio
 import os
 from openai import AsyncOpenAI
 from dotenv import load_dotenv
 load_dotenv()
 nous_client = AsyncOpenAI(
    api_key=os.getenv("NOUS_API_KEY"),
    base_url="https://inference-api.nousresearch.com/v1"
 )
 MODEL = "gemini-2.5-flash"
 async def test_temp(temp_value):
    """Test a specific temperature value."""
    content = "Test content. " * 1000  # 14k chars
    print(f"Testing temperature={temp_value}...", end=" ")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": "Summarize this content."},
                {"role": "user", "content": content}
            ],
            temperature=temp_value,
            max_tokens=4000
        )
        print(f"✅ SUCCESS")
        return True
    except Exception as e:
        print(f"❌ FAILED")
        return False
 async def main():
    print("Testing temperature threshold for Nous API...")
    print("="*60)
    temps = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1.0]
    for temp in temps:
        await test_temp(temp)
        await asyncio.sleep(0.5)
    print("="*60)
    print("\nNow testing with ACTUAL web_tools.py content and parameters:")
    print("="*60)
    # Simulate the actual web_tools.py call
    system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
 Create a well-structured markdown summary that includes:
 1. Key excerpts (quotes, code snippets, important facts) in their original format
 2. Comprehensive summary of all other important information
 3. Proper markdown formatting with headers, bullets, and emphasis
 Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized."""
    content = "Sample web page content. " * 3000  # ~75k chars like the real failures
    user_prompt = f"""Please process this web content and create a comprehensive markdown summary:
 CONTENT TO PROCESS:
 {content}
 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
    print(f"\nActual web_tools call (temp=0.1, {len(content):,} chars)...", end=" ")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.1,
            max_tokens=4000
        )
        print(f"✅ SUCCESS")
    except Exception:
        print(f"❌ FAILED")
    await asyncio.sleep(0.5)
    print(f"Same call but with temp=0.3...", end=" ")
    try:
        response = await nous_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.3,
            max_tokens=4000
        )
        print(f"✅ SUCCESS")
    except Exception:
        print(f"❌ FAILED")
 if __name__ == "__main__":
    asyncio.run(main())