refactor: remove obsolete Nous API test scripts

- Deleted test scripts for Nous API limits, patterns, and temperature checks to streamline the testing suite.
- These scripts were no longer necessary and their removal helps maintain a cleaner codebase.
This commit is contained in:
teknium1 2026-02-21 03:21:13 -08:00
parent a885d2f240
commit cbff1b818c
3 changed files with 0 additions and 416 deletions

View file

@ -1,176 +0,0 @@
#!/usr/bin/env python3
"""
Test script to diagnose Nous API 400 errors with gemini-2.5-flash model.
This tests various content lengths and parameters to identify what causes failures.
"""
import asyncio
import os
from openai import AsyncOpenAI
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Initialize the Nous API client
nous_client = AsyncOpenAI(
api_key=os.getenv("NOUS_API_KEY"),
base_url="https://inference-api.nousresearch.com/v1"
)
MODEL = "gemini-2.5-flash"
async def test_api_call(test_name: str, content_length: int, **kwargs):
"""Test an API call with specific parameters."""
print(f"\n{'='*60}")
print(f"Test: {test_name}")
print(f"Content length: {content_length:,} characters")
print(f"Additional params: {kwargs}")
print(f"{'='*60}")
# Generate test content
content = "A" * content_length
system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
Create a well-structured markdown summary that includes:
1. Key excerpts (quotes, code snippets, important facts) in their original format
2. Comprehensive summary of all other important information
3. Proper markdown formatting with headers, bullets, and emphasis
Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized."""
user_prompt = f"""Please process this web content and create a comprehensive markdown summary:
CONTENT TO PROCESS:
{content}
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
**kwargs
)
result = response.choices[0].message.content
print(f"✅ SUCCESS")
print(f" Response length: {len(result)} characters")
print(f" Model used: {response.model}")
print(f" Usage: {response.usage}")
return True
except Exception as e:
print(f"❌ FAILED: {str(e)}")
return False
async def main():
"""Run all tests."""
print("Testing Nous API with gemini-2.5-flash model")
print(f"API Key present: {'Yes' if os.getenv('NOUS_API_KEY') else 'No'}")
results = {}
# Test 1: Small content (should always work)
results['small'] = await test_api_call(
"Small content (5,000 chars)",
5000,
temperature=0.1,
max_tokens=4000
)
await asyncio.sleep(1)
# Test 2: Medium content (around what was failing)
results['medium'] = await test_api_call(
"Medium content (20,000 chars)",
20000,
temperature=0.1,
max_tokens=4000
)
await asyncio.sleep(1)
# Test 3: Large content (79,625 chars like the error)
results['large'] = await test_api_call(
"Large content (79,625 chars)",
79625,
temperature=0.1,
max_tokens=4000
)
await asyncio.sleep(1)
# Test 4: Very large content (100k chars)
results['very_large'] = await test_api_call(
"Very large content (100,000 chars)",
100000,
temperature=0.1,
max_tokens=4000
)
await asyncio.sleep(1)
# Test 5: Same as working case but different max_tokens
results['diff_max_tokens'] = await test_api_call(
"Medium content with higher max_tokens",
20000,
temperature=0.1,
max_tokens=8000
)
await asyncio.sleep(1)
# Test 6: No max_tokens specified
results['no_max_tokens'] = await test_api_call(
"Medium content without max_tokens",
20000,
temperature=0.1
)
await asyncio.sleep(1)
# Test 7: With actual web content (mixed characters)
mixed_content = """
This is a test of web content with various characters:
- Unicode: 你好世界 🌍
- Special chars: <>&"'
- Numbers: 123456789
- Markdown: **bold** _italic_ `code`
- URLs: https://example.com
""" * 1000 # Repeat to make it ~79k chars
print(f"\n{'='*60}")
print(f"Test: Mixed content (real-world scenario)")
print(f"Content length: {len(mixed_content):,} characters")
print(f"{'='*60}")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "Summarize this content."},
{"role": "user", "content": mixed_content}
],
temperature=0.1,
max_tokens=4000
)
print(f"✅ SUCCESS")
results['mixed_content'] = True
except Exception as e:
print(f"❌ FAILED: {str(e)}")
results['mixed_content'] = False
# Summary
print(f"\n{'='*60}")
print("SUMMARY OF RESULTS:")
print(f"{'='*60}")
for test, passed in results.items():
status = "✅ PASS" if passed else "❌ FAIL"
print(f"{test:20s}: {status}")
passed = sum(results.values())
total = len(results)
print(f"\nTotal: {passed}/{total} tests passed")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -1,131 +0,0 @@
#!/usr/bin/env python3
"""
Test to understand the pattern of failures - it's not about content length!
"""
import asyncio
import os
from openai import AsyncOpenAI
from dotenv import load_dotenv
load_dotenv()
nous_client = AsyncOpenAI(
api_key=os.getenv("NOUS_API_KEY"),
base_url="https://inference-api.nousresearch.com/v1"
)
MODEL = "gemini-2.5-flash"
async def quick_test(description: str, content: str, **kwargs):
"""Quick API test."""
print(f"\n{description} ({len(content):,} chars)...", end=" ")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "Summarize this."},
{"role": "user", "content": content}
],
**kwargs
)
print(f"✅ SUCCESS")
return True
except Exception as e:
print(f"❌ FAILED: {str(e)[:80]}")
return False
async def main():
print("Testing different content types and parameters...")
# Theory 1: Repeated characters trigger validation
print("\n" + "="*60)
print("THEORY 1: Repeated characters")
print("="*60)
await quick_test("Repeated 'A's (5k)", "A" * 5000, temperature=0.1, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("Repeated 'A's (79k)", "A" * 79625, temperature=0.1, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("Varied text (5k)", "Test content. " * 400, temperature=0.1, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("Varied text (79k)", "Test content with variety. " * 3000, temperature=0.1, max_tokens=4000)
# Theory 2: max_tokens parameter
print("\n" + "="*60)
print("THEORY 2: max_tokens parameter")
print("="*60)
content = "Test " * 4000 # 20k chars
await quick_test("max_tokens=4000", content, temperature=0.1, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("max_tokens=8000", content, temperature=0.1, max_tokens=8000)
await asyncio.sleep(0.5)
await quick_test("max_tokens=2000", content, temperature=0.1, max_tokens=2000)
await asyncio.sleep(0.5)
await quick_test("No max_tokens", content, temperature=0.1)
# Theory 3: Temperature parameter
print("\n" + "="*60)
print("THEORY 3: Temperature parameter")
print("="*60)
content = "Test " * 4000
await quick_test("temperature=0.1", content, temperature=0.1, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("temperature=0.0", content, temperature=0.0, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("temperature=0.5", content, temperature=0.5, max_tokens=4000)
await asyncio.sleep(0.5)
await quick_test("No temperature", content, max_tokens=4000)
# Theory 4: System prompt impact
print("\n" + "="*60)
print("THEORY 4: System prompt length")
print("="*60)
short_system = "Summarize this."
long_system = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
Create a well-structured markdown summary that includes:
1. Key excerpts (quotes, code snippets, important facts) in their original format
2. Comprehensive summary of all other important information
3. Proper markdown formatting with headers, bullets, and emphasis
Your goal is to preserve ALL important information while reducing length."""
content = "A" * 5000
print(f"\nShort system prompt...", end=" ")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": short_system},
{"role": "user", "content": content}
],
temperature=0.1,
max_tokens=4000
)
print(f"✅ SUCCESS")
except Exception as e:
print(f"❌ FAILED")
await asyncio.sleep(0.5)
print(f"Long system prompt...", end=" ")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": long_system},
{"role": "user", "content": content}
],
temperature=0.1,
max_tokens=4000
)
print(f"✅ SUCCESS")
except Exception as e:
print(f"❌ FAILED")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -1,109 +0,0 @@
#!/usr/bin/env python3
"""
Test to confirm: temperature < 0.3 causes failures on Nous API
"""
import asyncio
import os
from openai import AsyncOpenAI
from dotenv import load_dotenv
load_dotenv()
nous_client = AsyncOpenAI(
api_key=os.getenv("NOUS_API_KEY"),
base_url="https://inference-api.nousresearch.com/v1"
)
MODEL = "gemini-2.5-flash"
async def test_temp(temp_value):
"""Test a specific temperature value."""
content = "Test content. " * 1000 # 14k chars
print(f"Testing temperature={temp_value}...", end=" ")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "Summarize this content."},
{"role": "user", "content": content}
],
temperature=temp_value,
max_tokens=4000
)
print(f"✅ SUCCESS")
return True
except Exception as e:
print(f"❌ FAILED")
return False
async def main():
print("Testing temperature threshold for Nous API...")
print("="*60)
temps = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 1.0]
for temp in temps:
await test_temp(temp)
await asyncio.sleep(0.5)
print("="*60)
print("\nNow testing with ACTUAL web_tools.py content and parameters:")
print("="*60)
# Simulate the actual web_tools.py call
system_prompt = """You are an expert content analyst. Your job is to process web content and create a comprehensive yet concise summary that preserves all important information while dramatically reducing bulk.
Create a well-structured markdown summary that includes:
1. Key excerpts (quotes, code snippets, important facts) in their original format
2. Comprehensive summary of all other important information
3. Proper markdown formatting with headers, bullets, and emphasis
Your goal is to preserve ALL important information while reducing length. Never lose key facts, figures, insights, or actionable information. Make it scannable and well-organized."""
content = "Sample web page content. " * 3000 # ~75k chars like the real failures
user_prompt = f"""Please process this web content and create a comprehensive markdown summary:
CONTENT TO PROCESS:
{content}
Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
print(f"\nActual web_tools call (temp=0.1, {len(content):,} chars)...", end=" ")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.1,
max_tokens=4000
)
print(f"✅ SUCCESS")
except Exception:
print(f"❌ FAILED")
await asyncio.sleep(0.5)
print(f"Same call but with temp=0.3...", end=" ")
try:
response = await nous_client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.3,
max_tokens=4000
)
print(f"✅ SUCCESS")
except Exception:
print(f"❌ FAILED")
if __name__ == "__main__":
asyncio.run(main())