feat(api): implement dynamic max tokens handling for various providers
- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others). - Updated API calls in AIAgent to utilize the new max tokens handling. - Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients. - Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.
This commit is contained in:
parent
f0458ebdb8
commit
58fce0a37b
7 changed files with 67 additions and 20 deletions
|
|
@ -812,10 +812,11 @@ def _extract_relevant_content(
|
|||
)
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
response = _aux_vision_client.chat.completions.create(
|
||||
model=EXTRACTION_MODEL,
|
||||
messages=[{"role": "user", "content": extraction_prompt}],
|
||||
max_tokens=4000,
|
||||
**auxiliary_max_tokens_param(4000),
|
||||
temperature=0.1,
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
|
|
@ -1283,6 +1284,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
)
|
||||
|
||||
# Use the sync auxiliary vision client directly
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
response = _aux_vision_client.chat.completions.create(
|
||||
model=EXTRACTION_MODEL,
|
||||
messages=[
|
||||
|
|
@ -1294,7 +1296,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
],
|
||||
}
|
||||
],
|
||||
max_tokens=2000,
|
||||
**auxiliary_max_tokens_param(2000),
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ async def _summarize_session(
|
|||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _async_aux_client.chat.completions.create(
|
||||
model=_SUMMARIZER_MODEL,
|
||||
|
|
@ -180,7 +180,7 @@ async def _summarize_session(
|
|||
],
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
temperature=0.1,
|
||||
max_tokens=MAX_SUMMARY_TOKENS,
|
||||
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -314,13 +314,13 @@ async def vision_analyze_tool(
|
|||
logger.info("Processing image with %s...", model)
|
||||
|
||||
# Call the vision API
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _aux_async_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0.1,
|
||||
max_tokens=2000,
|
||||
**auxiliary_max_tokens_param(2000),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ Create a markdown summary that captures all key information in a well-organized,
|
|||
if _aux_async_client is None:
|
||||
logger.warning("No auxiliary model available for web content processing")
|
||||
return None
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _aux_async_client.chat.completions.create(
|
||||
model=model,
|
||||
|
|
@ -251,7 +251,7 @@ Create a markdown summary that captures all key information in a well-organized,
|
|||
{"role": "user", "content": user_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=max_tokens,
|
||||
**auxiliary_max_tokens_param(max_tokens),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
return response.choices[0].message.content.strip()
|
||||
|
|
@ -365,7 +365,7 @@ Create a single, unified markdown summary."""
|
|||
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
|
||||
return fallback
|
||||
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
||||
_extra = get_auxiliary_extra_body()
|
||||
response = await _aux_async_client.chat.completions.create(
|
||||
model=model,
|
||||
|
|
@ -374,7 +374,7 @@ Create a single, unified markdown summary."""
|
|||
{"role": "user", "content": synthesis_prompt}
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=4000,
|
||||
**auxiliary_max_tokens_param(4000),
|
||||
**({} if not _extra else {"extra_body": _extra}),
|
||||
)
|
||||
final_summary = response.choices[0].message.content.strip()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue