repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
Mirrored from https://github.com/yingqi-z20/Agent-libOS
stars
latest
clone command
git clone gitlawb://did:key:z6MkqRzA...RfoM/yingqi-z20-Agen...git clone gitlawb://did:key:z6MkqRzA.../yingqi-z20-Agen...d98dd2c9IPC1d ago| #1 | from __future__ import annotations |
| #2 | |
| #3 | import asyncio |
| #4 | import inspect |
| #5 | import os |
| #6 | from dataclasses import dataclass, field |
| #7 | from pathlib import Path |
| #8 | from typing import Any, Literal |
| #9 | |
| #10 | from agent_libos.config import DEFAULT_CONFIG |
| #11 | from agent_libos.models.exceptions import LibOSError |
| #12 | from agent_libos.utils.serde import to_jsonable |
| #13 | |
| #14 | _TRUE_VALUES = {"1", "true", "yes", "on"} |
| #15 | _FALSE_VALUES = {"0", "false", "no", "off"} |
| #16 | _API_MODES = {"auto", "responses", "chat"} |
| #17 | _LLM_DEFAULTS = DEFAULT_CONFIG.llm |
| #18 | |
| #19 | |
| #20 | class LLMError(LibOSError): |
| #21 | pass |
| #22 | |
| #23 | |
| #24 | @dataclass |
| #25 | class LLMCompletion: |
| #26 | content: str |
| #27 | tool_calls: list[dict[str, Any]] |
| #28 | raw: Any | None = None |
| #29 | api: str | None = None |
| #30 | response_id: str | None = None |
| #31 | request_id: str | None = None |
| #32 | model: str | None = None |
| #33 | usage: dict[str, Any] = field(default_factory=dict) |
| #34 | reasoning: Any | None = None |
| #35 | |
| #36 | |
| #37 | @dataclass |
| #38 | class LLMClient: |
| #39 | base_url: str | None = None |
| #40 | model: str | None = None |
| #41 | api_key: str | None = None |
| #42 | timeout: float = _LLM_DEFAULTS.timeout_s |
| #43 | max_retries: int = _LLM_DEFAULTS.max_retries |
| #44 | api_mode: Literal["auto", "responses", "chat"] = _LLM_DEFAULTS.api_mode |
| #45 | store: bool = _LLM_DEFAULTS.store |
| #46 | reasoning_effort: str | None = None |
| #47 | verbosity: Literal["low", "medium", "high"] | None = None |
| #48 | _client: Any | None = field(default=None, init=False, repr=False) |
| #49 | _async_client: Any | None = field(default=None, init=False, repr=False) |
| #50 | |
| #51 | @classmethod |
| #52 | def from_env(cls, env_path: str | Path = ".env") -> "LLMClient": |
| #53 | load_dotenv(env_path) |
| #54 | api_mode = os.getenv("OPENAI_API_MODE", "auto").strip().lower() |
| #55 | if api_mode not in _API_MODES: |
| #56 | raise LLMError(f"OPENAI_API_MODE must be one of {sorted(_API_MODES)}, got {api_mode!r}") |
| #57 | return cls( |
| #58 | base_url=os.getenv("OPENAI_BASE_URL"), |
| #59 | model=os.getenv("OPENAI_LANGUAGE_MODEL") or os.getenv("OPENAI_MODEL"), |
| #60 | api_key=os.getenv("OPENAI_API_KEY"), |
| #61 | timeout=_float_env("OPENAI_TIMEOUT", default=_LLM_DEFAULTS.timeout_s), |
| #62 | max_retries=_int_env("OPENAI_MAX_RETRIES", default=_LLM_DEFAULTS.max_retries), |
| #63 | api_mode=api_mode, # type: ignore[arg-type] |
| #64 | store=_bool_env("OPENAI_STORE", default=_LLM_DEFAULTS.store), |
| #65 | reasoning_effort=_optional_env("OPENAI_REASONING_EFFORT"), |
| #66 | verbosity=_verbosity_env("OPENAI_VERBOSITY"), |
| #67 | ) |
| #68 | |
| #69 | def complete( |
| #70 | self, |
| #71 | messages: list[dict[str, Any]], |
| #72 | temperature: float = _LLM_DEFAULTS.temperature, |
| #73 | max_tokens: int = _LLM_DEFAULTS.max_tokens, |
| #74 | json_mode: bool = True, |
| #75 | ) -> str: |
| #76 | return self.complete_with_metadata( |
| #77 | messages=messages, |
| #78 | temperature=temperature, |
| #79 | max_tokens=max_tokens, |
| #80 | json_mode=json_mode, |
| #81 | ).content |
| #82 | |
| #83 | def complete_with_metadata( |
| #84 | self, |
| #85 | messages: list[dict[str, Any]], |
| #86 | temperature: float = _LLM_DEFAULTS.temperature, |
| #87 | max_tokens: int = _LLM_DEFAULTS.max_tokens, |
| #88 | json_mode: bool = True, |
| #89 | ) -> LLMCompletion: |
| #90 | return _run_sync( |
| #91 | self.acomplete_with_metadata( |
| #92 | messages=messages, |
| #93 | temperature=temperature, |
| #94 | max_tokens=max_tokens, |
| #95 | json_mode=json_mode, |
| #96 | ) |
| #97 | ) |
| #98 | |
| #99 | async def acomplete( |
| #100 | self, |
| #101 | messages: list[dict[str, Any]], |
| #102 | temperature: float = _LLM_DEFAULTS.temperature, |
| #103 | max_tokens: int = _LLM_DEFAULTS.max_tokens, |
| #104 | json_mode: bool = True, |
| #105 | ) -> str: |
| #106 | return ( |
| #107 | await self.acomplete_with_metadata( |
| #108 | messages=messages, |
| #109 | temperature=temperature, |
| #110 | max_tokens=max_tokens, |
| #111 | json_mode=json_mode, |
| #112 | ) |
| #113 | ).content |
| #114 | |
| #115 | async def acomplete_with_metadata( |
| #116 | self, |
| #117 | messages: list[dict[str, Any]], |
| #118 | temperature: float = _LLM_DEFAULTS.temperature, |
| #119 | max_tokens: int = _LLM_DEFAULTS.max_tokens, |
| #120 | json_mode: bool = True, |
| #121 | ) -> LLMCompletion: |
| #122 | selected_messages = self._messages_with_json_instruction(messages) if json_mode else messages |
| #123 | completion = await self._complete_without_tools( |
| #124 | messages=selected_messages, |
| #125 | temperature=temperature, |
| #126 | max_tokens=max_tokens, |
| #127 | json_mode=json_mode, |
| #128 | ) |
| #129 | if not completion.content: |
| #130 | raise LLMError("LLM returned empty content") |
| #131 | return completion |
| #132 | |
| #133 | def complete_action( |
| #134 | self, |
| #135 | messages: list[dict[str, Any]], |
| #136 | tools: list[dict[str, Any]], |
| #137 | temperature: float = _LLM_DEFAULTS.temperature, |
| #138 | max_tokens: int = _LLM_DEFAULTS.max_tokens, |
| #139 | ) -> LLMCompletion: |
| #140 | return _run_sync( |
| #141 | self.acomplete_action( |
| #142 | messages=messages, |
| #143 | tools=tools, |
| #144 | temperature=temperature, |
| #145 | max_tokens=max_tokens, |
| #146 | ) |
| #147 | ) |
| #148 | |
| #149 | async def acomplete_action( |
| #150 | self, |
| #151 | messages: list[dict[str, Any]], |
| #152 | tools: list[dict[str, Any]], |
| #153 | temperature: float = _LLM_DEFAULTS.temperature, |
| #154 | max_tokens: int = _LLM_DEFAULTS.max_tokens, |
| #155 | ) -> LLMCompletion: |
| #156 | if self._use_responses_api(): |
| #157 | try: |
| #158 | return await self._responses_complete_action(messages, tools, temperature, max_tokens) |
| #159 | except LLMError as exc: |
| #160 | if self.api_mode != "auto" or not _should_fallback_to_chat(exc.__cause__ or exc): |
| #161 | raise |
| #162 | return await self._chat_complete_action(messages, tools, temperature, max_tokens) |
| #163 | |
| #164 | async def _complete_without_tools( |
| #165 | self, |
| #166 | messages: list[dict[str, Any]], |
| #167 | temperature: float, |
| #168 | max_tokens: int, |
| #169 | json_mode: bool, |
| #170 | ) -> LLMCompletion: |
| #171 | if self._use_responses_api(): |
| #172 | try: |
| #173 | return await self._responses_complete(messages, temperature, max_tokens, json_mode) |
| #174 | except LLMError as exc: |
| #175 | if self.api_mode != "auto" or not _should_fallback_to_chat(exc.__cause__ or exc): |
| #176 | raise |
| #177 | return await self._chat_complete(messages, temperature, max_tokens, json_mode) |
| #178 | |
| #179 | async def _responses_complete( |
| #180 | self, |
| #181 | messages: list[dict[str, Any]], |
| #182 | temperature: float, |
| #183 | max_tokens: int, |
| #184 | json_mode: bool, |
| #185 | ) -> LLMCompletion: |
| #186 | payload = self._responses_payload(messages, temperature=temperature, max_tokens=max_tokens) |
| #187 | if json_mode: |
| #188 | payload["text"] = self._text_config(json_mode=True) |
| #189 | response = await self._create_response(payload) |
| #190 | return self._completion_from_response(response) |
| #191 | |
| #192 | async def _responses_complete_action( |
| #193 | self, |
| #194 | messages: list[dict[str, Any]], |
| #195 | tools: list[dict[str, Any]], |
| #196 | temperature: float, |
| #197 | max_tokens: int, |
| #198 | ) -> LLMCompletion: |
| #199 | payload = self._responses_payload(messages, temperature=temperature, max_tokens=max_tokens) |
| #200 | payload.update( |
| #201 | { |
| #202 | "tools": _responses_tools_from_chat_tools(tools), |
| #203 | "tool_choice": "auto", |
| #204 | # The libOS executor dispatches one selected action per quantum. |
| #205 | "parallel_tool_calls": False, |
| #206 | } |
| #207 | ) |
| #208 | response = await self._create_response(payload) |
| #209 | return self._completion_from_response(response) |
| #210 | |
| #211 | async def _chat_complete( |
| #212 | self, |
| #213 | messages: list[dict[str, Any]], |
| #214 | temperature: float, |
| #215 | max_tokens: int, |
| #216 | json_mode: bool, |
| #217 | ) -> LLMCompletion: |
| #218 | payload = self._chat_payload(messages=messages, temperature=temperature, max_tokens=max_tokens) |
| #219 | if json_mode: |
| #220 | payload["response_format"] = {"type": "json_object"} |
| #221 | completion = await self._create_chat_completion(payload) |
| #222 | result = self._completion_from_chat(completion) |
| #223 | if self._needs_non_thinking_retry(result): |
| #224 | retry_payload = self._with_enable_thinking(payload, enabled=False) |
| #225 | completion = await self._create_chat_completion(retry_payload) |
| #226 | result = self._completion_from_chat(completion) |
| #227 | if not result.content: |
| #228 | finish_reason = _first_choice_attr(completion, "finish_reason") |
| #229 | raise LLMError(f"LLM returned empty content; finish_reason={finish_reason!r}") |
| #230 | return result |
| #231 | |
| #232 | async def _chat_complete_action( |
| #233 | self, |
| #234 | messages: list[dict[str, Any]], |
| #235 | tools: list[dict[str, Any]], |
| #236 | temperature: float, |
| #237 | max_tokens: int, |
| #238 | ) -> LLMCompletion: |
| #239 | payload = self._chat_payload(messages=messages, temperature=temperature, max_tokens=max_tokens) |
| #240 | payload.update({"tools": tools, "tool_choice": "auto", "parallel_tool_calls": False}) |
| #241 | try: |
| #242 | completion = await self._create_chat_completion(payload) |
| #243 | except LLMError as exc: |
| #244 | # Preserve compatibility with OpenAI-compatible providers that do not |
| #245 | # implement tool calling. The executor can still parse fallback JSON. |
| #246 | message = str(exc).lower() |
| #247 | if "tools" in message or "tool_choice" in message: |
| #248 | text = await self.acomplete(messages, temperature=temperature, max_tokens=max_tokens, json_mode=False) |
| #249 | return LLMCompletion(content=text, tool_calls=[], api="chat") |
| #250 | raise |
| #251 | result = self._completion_from_chat(completion) |
| #252 | if self._needs_non_thinking_retry(result): |
| #253 | completion = await self._create_chat_completion(self._with_enable_thinking(payload, enabled=False)) |
| #254 | result = self._completion_from_chat(completion) |
| #255 | return result |
| #256 | |
| #257 | def _client_or_raise(self) -> Any: |
| #258 | if self._client is not None: |
| #259 | return self._client |
| #260 | try: |
| #261 | from openai import OpenAI |
| #262 | except ImportError as exc: |
| #263 | raise LLMError("The OpenAI Python SDK is not installed. Install it with `pip install openai`.") from exc |
| #264 | self._client = OpenAI(**self._client_kwargs()) |
| #265 | return self._client |
| #266 | |
| #267 | def _async_client_or_raise(self) -> Any: |
| #268 | if self._async_client is not None: |
| #269 | return self._async_client |
| #270 | try: |
| #271 | from openai import AsyncOpenAI |
| #272 | except ImportError as exc: |
| #273 | raise LLMError("The OpenAI Python SDK is not installed. Install it with `pip install openai`.") from exc |
| #274 | self._async_client = AsyncOpenAI(**self._client_kwargs()) |
| #275 | return self._async_client |
| #276 | |
| #277 | def _client_kwargs(self) -> dict[str, Any]: |
| #278 | if not self.model: |
| #279 | raise LLMError("OPENAI_LANGUAGE_MODEL or OPENAI_MODEL is not configured") |
| #280 | if not (self.api_key or os.getenv("OPENAI_API_KEY")): |
| #281 | raise LLMError("OPENAI_API_KEY is not configured") |
| #282 | |
| #283 | kwargs: dict[str, Any] = { |
| #284 | "timeout": self.timeout, |
| #285 | # Let the SDK own transient network/rate-limit retry behavior. |
| #286 | "max_retries": self.max_retries, |
| #287 | } |
| #288 | if self.api_key: |
| #289 | kwargs["api_key"] = self.api_key |
| #290 | if self.base_url: |
| #291 | kwargs["base_url"] = self.base_url |
| #292 | return kwargs |
| #293 | |
| #294 | def _responses_payload(self, messages: list[dict[str, Any]], temperature: float, max_tokens: int) -> dict[str, Any]: |
| #295 | if not self.model: |
| #296 | raise LLMError("OPENAI_LANGUAGE_MODEL or OPENAI_MODEL is not configured") |
| #297 | instructions, input_items = _messages_to_responses_parts(messages) |
| #298 | payload: dict[str, Any] = { |
| #299 | "model": self.model, |
| #300 | "input": input_items, |
| #301 | "max_output_tokens": max_tokens, |
| #302 | "store": self.store, |
| #303 | "truncation": "auto", |
| #304 | } |
| #305 | if instructions: |
| #306 | payload["instructions"] = instructions |
| #307 | if temperature is not None: |
| #308 | payload["temperature"] = temperature |
| #309 | if self.reasoning_effort: |
| #310 | payload["reasoning"] = {"effort": self.reasoning_effort} |
| #311 | text_config = self._text_config(json_mode=False) |
| #312 | if text_config: |
| #313 | payload["text"] = text_config |
| #314 | extra_body = self._extra_body() |
| #315 | if extra_body: |
| #316 | payload["extra_body"] = extra_body |
| #317 | return payload |
| #318 | |
| #319 | def _chat_payload(self, messages: list[dict[str, Any]], temperature: float, max_tokens: int) -> dict[str, Any]: |
| #320 | if not self.model: |
| #321 | raise LLMError("OPENAI_LANGUAGE_MODEL or OPENAI_MODEL is not configured") |
| #322 | |
| #323 | payload: dict[str, Any] = { |
| #324 | "model": self.model, |
| #325 | "messages": messages, |
| #326 | "temperature": temperature, |
| #327 | "max_completion_tokens": max_tokens, |
| #328 | } |
| #329 | if self.store: |
| #330 | payload["store"] = True |
| #331 | if self.reasoning_effort: |
| #332 | payload["reasoning_effort"] = self.reasoning_effort |
| #333 | if self.verbosity: |
| #334 | payload["verbosity"] = self.verbosity |
| #335 | extra_body = self._extra_body() |
| #336 | if extra_body: |
| #337 | payload["extra_body"] = extra_body |
| #338 | return payload |
| #339 | |
| #340 | async def _create_response(self, payload: dict[str, Any]) -> Any: |
| #341 | client = self._async_client_or_raise() |
| #342 | return await self._call_with_compatibility(client.responses.create, payload, api="responses") |
| #343 | |
| #344 | async def _create_chat_completion(self, payload: dict[str, Any]) -> Any: |
| #345 | client = self._async_client_or_raise() |
| #346 | return await self._call_with_compatibility(client.chat.completions.create, payload, api="chat") |
| #347 | |
| #348 | async def _call_with_compatibility(self, create: Any, payload: dict[str, Any], api: str) -> Any: |
| #349 | request = dict(payload) |
| #350 | last_error: Exception | None = None |
| #351 | for _attempt in range(_LLM_DEFAULTS.compatibility_retry_attempts): |
| #352 | try: |
| #353 | return await create(**request) |
| #354 | except Exception as exc: |
| #355 | if not _is_openai_sdk_error(exc): |
| #356 | raise |
| #357 | last_error = exc |
| #358 | retry = self._compatibility_retry_payload(request, exc, api=api) |
| #359 | if retry is None: |
| #360 | request_id = getattr(exc, "request_id", None) |
| #361 | status_code = getattr(exc, "status_code", None) |
| #362 | raise LLMError( |
| #363 | f"OpenAI SDK {api} request failed: status={status_code!r} request_id={request_id!r} error={exc}" |
| #364 | ) from exc |
| #365 | request = retry |
| #366 | raise LLMError(f"OpenAI SDK {api} request failed after compatibility retries: {last_error}") from last_error |
| #367 | |
| #368 | def _compatibility_retry_payload(self, payload: dict[str, Any], exc: Exception, api: str) -> dict[str, Any] | None: |
| #369 | message = str(exc).lower() |
| #370 | retry = dict(payload) |
| #371 | |
| #372 | if "enable_thinking" in message and "extra_body" in retry: |
| #373 | retry.pop("extra_body", None) |
| #374 | return retry |
| #375 | if "max_completion_tokens" in message and "max_completion_tokens" in retry: |
| #376 | retry["max_tokens"] = retry.pop("max_completion_tokens") |
| #377 | return retry |
| #378 | if "max_tokens" in message and "max_tokens" in retry: |
| #379 | retry["max_completion_tokens"] = retry.pop("max_tokens") |
| #380 | return retry |
| #381 | if "max_output_tokens" in message and "max_output_tokens" in retry and api == "responses": |
| #382 | retry.pop("max_output_tokens", None) |
| #383 | return retry |
| #384 | for key in ("parallel_tool_calls", "response_format", "temperature", "store", "reasoning", "reasoning_effort"): |
| #385 | if key in message and key in retry: |
| #386 | retry.pop(key, None) |
| #387 | return retry |
| #388 | if "verbosity" in message: |
| #389 | if "verbosity" in retry: |
| #390 | retry.pop("verbosity", None) |
| #391 | return retry |
| #392 | text = retry.get("text") |
| #393 | if isinstance(text, dict) and "verbosity" in text: |
| #394 | updated_text = dict(text) |
| #395 | updated_text.pop("verbosity", None) |
| #396 | retry["text"] = updated_text |
| #397 | return retry |
| #398 | if ("text" in message or "json_schema" in message or "json_object" in message) and "text" in retry: |
| #399 | retry.pop("text", None) |
| #400 | return retry |
| #401 | if "response_format" in message and "response_format" in retry: |
| #402 | retry.pop("response_format", None) |
| #403 | return retry |
| #404 | return None |
| #405 | |
| #406 | def _completion_from_response(self, response: Any) -> LLMCompletion: |
| #407 | error = getattr(response, "error", None) |
| #408 | if error is not None: |
| #409 | raise LLMError(f"OpenAI response failed: {error}") |
| #410 | tool_calls: list[dict[str, Any]] = [] |
| #411 | for item in getattr(response, "output", None) or []: |
| #412 | if _get_attr_or_key(item, "type") != "function_call": |
| #413 | continue |
| #414 | tool_calls.append( |
| #415 | { |
| #416 | "id": _get_attr_or_key(item, "id") or _get_attr_or_key(item, "call_id"), |
| #417 | "call_id": _get_attr_or_key(item, "call_id"), |
| #418 | "name": _get_attr_or_key(item, "name"), |
| #419 | "arguments": _get_attr_or_key(item, "arguments") or "{}", |
| #420 | } |
| #421 | ) |
| #422 | return LLMCompletion( |
| #423 | content=self._response_text(response), |
| #424 | tool_calls=tool_calls, |
| #425 | raw=response, |
| #426 | api="responses", |
| #427 | response_id=getattr(response, "id", None), |
| #428 | request_id=getattr(response, "_request_id", None), |
| #429 | model=str(getattr(response, "model", "")) or None, |
| #430 | usage=_usage_from_response(response), |
| #431 | reasoning=_reasoning_from_response(response), |
| #432 | ) |
| #433 | |
| #434 | def _completion_from_chat(self, completion: Any) -> LLMCompletion: |
| #435 | try: |
| #436 | message = completion.choices[0].message |
| #437 | except (AttributeError, IndexError) as exc: |
| #438 | raise LLMError(f"unexpected LLM response shape: {completion}") from exc |
| #439 | |
| #440 | tool_calls: list[dict[str, Any]] = [] |
| #441 | for call in getattr(message, "tool_calls", None) or []: |
| #442 | function = getattr(call, "function", None) |
| #443 | if function is None: |
| #444 | continue |
| #445 | tool_calls.append( |
| #446 | { |
| #447 | "id": getattr(call, "id", None), |
| #448 | "name": getattr(function, "name", None), |
| #449 | "arguments": getattr(function, "arguments", "{}"), |
| #450 | } |
| #451 | ) |
| #452 | |
| #453 | return LLMCompletion( |
| #454 | content=self._message_content(message), |
| #455 | tool_calls=tool_calls, |
| #456 | raw=completion, |
| #457 | api="chat", |
| #458 | response_id=getattr(completion, "id", None), |
| #459 | request_id=getattr(completion, "_request_id", None), |
| #460 | model=str(getattr(completion, "model", "")) or None, |
| #461 | usage=_usage_from_response(completion), |
| #462 | reasoning=_reasoning_from_chat_message(message), |
| #463 | ) |
| #464 | |
| #465 | def _use_responses_api(self) -> bool: |
| #466 | if self.api_mode == "responses": |
| #467 | return True |
| #468 | if self.api_mode == "chat": |
| #469 | return False |
| #470 | return self.base_url is None or _is_openai_base_url(self.base_url) |
| #471 | |
| #472 | def _text_config(self, json_mode: bool) -> dict[str, Any]: |
| #473 | config: dict[str, Any] = {} |
| #474 | if json_mode: |
| #475 | config["format"] = {"type": "json_object"} |
| #476 | if self.verbosity: |
| #477 | config["verbosity"] = self.verbosity |
| #478 | return config |
| #479 | |
| #480 | def _extra_body(self) -> dict[str, Any]: |
| #481 | configured_thinking = os.getenv("OPENAI_ENABLE_THINKING") |
| #482 | if configured_thinking is None: |
| #483 | return {} |
| #484 | return {"enable_thinking": _bool_env_value(configured_thinking)} |
| #485 | |
| #486 | def _needs_non_thinking_retry(self, completion: LLMCompletion) -> bool: |
| #487 | if completion.tool_calls or completion.content.strip(): |
| #488 | return False |
| #489 | if os.getenv("OPENAI_ENABLE_THINKING") is not None: |
| #490 | return False |
| #491 | if self.base_url is None or _is_openai_base_url(self.base_url): |
| #492 | return False |
| #493 | return True |
| #494 | |
| #495 | @staticmethod |
| #496 | def _with_enable_thinking(payload: dict[str, Any], enabled: bool) -> dict[str, Any]: |
| #497 | retry = dict(payload) |
| #498 | extra_body = dict(retry.get("extra_body") or {}) |
| #499 | extra_body["enable_thinking"] = enabled |
| #500 | retry["extra_body"] = extra_body |
| #501 | return retry |
| #502 | |
| #503 | @staticmethod |
| #504 | def _response_text(response: Any) -> str: |
| #505 | output_text = getattr(response, "output_text", None) |
| #506 | if isinstance(output_text, str): |
| #507 | return output_text |
| #508 | parts: list[str] = [] |
| #509 | for item in getattr(response, "output", None) or []: |
| #510 | if _get_attr_or_key(item, "type") != "message": |
| #511 | continue |
| #512 | for content in _get_attr_or_key(item, "content") or []: |
| #513 | if _get_attr_or_key(content, "type") == "output_text": |
| #514 | parts.append(str(_get_attr_or_key(content, "text") or "")) |
| #515 | return "".join(parts) |
| #516 | |
| #517 | @staticmethod |
| #518 | def _message_content(message: Any) -> str: |
| #519 | content = getattr(message, "content", None) |
| #520 | if content is None: |
| #521 | return "" |
| #522 | if isinstance(content, str): |
| #523 | return content |
| #524 | if isinstance(content, list): |
| #525 | return "".join(_content_part_text(part) for part in content) |
| #526 | return str(content) |
| #527 | |
| #528 | @staticmethod |
| #529 | def _messages_with_json_instruction(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: |
| #530 | if _messages_contain_json_instruction(messages): |
| #531 | return messages |
| #532 | new_messages = [dict(message) for message in messages] |
| #533 | json_instruction = _LLM_DEFAULTS.json_instruction |
| #534 | for msg in new_messages: |
| #535 | if msg.get("role") in {"system", "developer"}: |
| #536 | msg["content"] = str(msg.get("content", "")) + f" {json_instruction}" |
| #537 | return new_messages |
| #538 | return [{"role": "system", "content": json_instruction}] + new_messages |
| #539 | |
| #540 | |
| #541 | def _responses_tools_from_chat_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]: |
| #542 | converted: list[dict[str, Any]] = [] |
| #543 | for tool in tools: |
| #544 | if tool.get("type") != "function": |
| #545 | continue |
| #546 | function = tool.get("function") if isinstance(tool.get("function"), dict) else tool |
| #547 | name = function.get("name") if isinstance(function, dict) else None |
| #548 | if not name: |
| #549 | continue |
| #550 | converted.append( |
| #551 | { |
| #552 | "type": "function", |
| #553 | "name": name, |
| #554 | "description": function.get("description", ""), |
| #555 | "parameters": function.get("parameters") or {"type": "object", "properties": {}}, |
| #556 | # Existing Pydantic-generated schemas are not strict-mode |
| #557 | # normalized yet, so keep runtime compatibility while preserving |
| #558 | # JSON-schema argument guidance. |
| #559 | "strict": False, |
| #560 | } |
| #561 | ) |
| #562 | return converted |
| #563 | |
| #564 | |
| #565 | def _messages_to_responses_parts(messages: list[dict[str, Any]]) -> tuple[str | None, list[dict[str, Any]]]: |
| #566 | instructions: list[str] = [] |
| #567 | input_items: list[dict[str, Any]] = [] |
| #568 | for message in messages: |
| #569 | role = str(message.get("role", "user")) |
| #570 | content = _message_content_for_search(message) |
| #571 | if role in {"system", "developer"}: |
| #572 | if content: |
| #573 | instructions.append(content) |
| #574 | continue |
| #575 | input_items.append( |
| #576 | { |
| #577 | "role": "assistant" if role == "assistant" else "user", |
| #578 | "content": content, |
| #579 | } |
| #580 | ) |
| #581 | return ("\n\n".join(instructions) if instructions else None), input_items |
| #582 | |
| #583 | |
| #584 | def _is_openai_sdk_error(exc: Exception) -> bool: |
| #585 | try: |
| #586 | from openai import OpenAIError |
| #587 | except ImportError: |
| #588 | return False |
| #589 | return isinstance(exc, OpenAIError) |
| #590 | |
| #591 | |
| #592 | def _should_fallback_to_chat(exc: Exception) -> bool: |
| #593 | status_code = getattr(exc, "status_code", None) |
| #594 | message = str(exc).lower() |
| #595 | if status_code in _LLM_DEFAULTS.fallback_status_codes: |
| #596 | return True |
| #597 | return any( |
| #598 | fragment in message |
| #599 | for fragment in ( |
| #600 | "responses", |
| #601 | "unknown url", |
| #602 | "unsupported endpoint", |
| #603 | "not found", |
| #604 | "invalid endpoint", |
| #605 | ) |
| #606 | ) |
| #607 | |
| #608 | |
| #609 | def _is_openai_base_url(base_url: str) -> bool: |
| #610 | normalized = base_url.lower().rstrip("/") |
| #611 | return normalized in {"https://api.openai.com/v1", "https://api.openai.com"} or "api.openai.com" in normalized |
| #612 | |
| #613 | |
| #614 | def _messages_contain_json_instruction(messages: list[dict[str, Any]]) -> bool: |
| #615 | return any("json" in _message_content_for_search(message).lower() for message in messages) |
| #616 | |
| #617 | |
| #618 | def _message_content_for_search(message: dict[str, Any]) -> str: |
| #619 | content = message.get("content") |
| #620 | if isinstance(content, str): |
| #621 | return content |
| #622 | if isinstance(content, list): |
| #623 | return " ".join(_content_part_text(part) for part in content) |
| #624 | return "" if content is None else str(content) |
| #625 | |
| #626 | |
| #627 | def _content_part_text(part: Any) -> str: |
| #628 | if isinstance(part, dict): |
| #629 | return str(part.get("text") or part.get("content") or "") |
| #630 | return str(getattr(part, "text", getattr(part, "content", part)) or "") |
| #631 | |
| #632 | |
| #633 | def _get_attr_or_key(value: Any, key: str) -> Any: |
| #634 | if isinstance(value, dict): |
| #635 | return value.get(key) |
| #636 | return getattr(value, key, None) |
| #637 | |
| #638 | |
| #639 | def _first_choice_attr(completion: Any, attr: str) -> Any: |
| #640 | try: |
| #641 | return getattr(completion.choices[0], attr, None) |
| #642 | except (AttributeError, IndexError): |
| #643 | return None |
| #644 | |
| #645 | |
| #646 | def _usage_from_response(response: Any) -> dict[str, Any]: |
| #647 | usage = _get_attr_or_key(response, "usage") |
| #648 | if usage is None: |
| #649 | return {} |
| #650 | jsonable = to_jsonable(usage) |
| #651 | return jsonable if isinstance(jsonable, dict) else {"raw": jsonable} |
| #652 | |
| #653 | |
| #654 | def _reasoning_from_response(response: Any) -> Any | None: |
| #655 | direct = _get_attr_or_key(response, "reasoning") |
| #656 | if direct is not None: |
| #657 | return to_jsonable(direct) |
| #658 | reasoning_items: list[Any] = [] |
| #659 | for item in _get_attr_or_key(response, "output") or []: |
| #660 | if _get_attr_or_key(item, "type") == "reasoning": |
| #661 | reasoning_items.append(to_jsonable(item)) |
| #662 | return reasoning_items or None |
| #663 | |
| #664 | |
| #665 | def _reasoning_from_chat_message(message: Any) -> Any | None: |
| #666 | for key in ("reasoning", "reasoning_content", "thinking", "thinking_content"): |
| #667 | value = _get_attr_or_key(message, key) |
| #668 | if _has_value(value): |
| #669 | return to_jsonable(value) |
| #670 | additional = _get_attr_or_key(message, "additional_kwargs") |
| #671 | if isinstance(additional, dict): |
| #672 | for key in ("reasoning", "reasoning_content", "thinking", "thinking_content"): |
| #673 | value = additional.get(key) |
| #674 | if _has_value(value): |
| #675 | return to_jsonable(value) |
| #676 | return None |
| #677 | |
| #678 | |
| #679 | def _has_value(value: Any) -> bool: |
| #680 | return value is not None and value != "" |
| #681 | |
| #682 | |
| #683 | def _bool_env_value(value: str) -> bool: |
| #684 | normalized = value.strip().lower() |
| #685 | if normalized in _TRUE_VALUES: |
| #686 | return True |
| #687 | if normalized in _FALSE_VALUES: |
| #688 | return False |
| #689 | raise LLMError(f"invalid boolean environment value: {value!r}") |
| #690 | |
| #691 | |
| #692 | def _bool_env(name: str, default: bool) -> bool: |
| #693 | value = os.getenv(name) |
| #694 | if value is None or not value.strip(): |
| #695 | return default |
| #696 | return _bool_env_value(value) |
| #697 | |
| #698 | |
| #699 | def _float_env(name: str, default: float) -> float: |
| #700 | value = os.getenv(name) |
| #701 | if value is None or not value.strip(): |
| #702 | return default |
| #703 | try: |
| #704 | return float(value) |
| #705 | except ValueError as exc: |
| #706 | raise LLMError(f"{name} must be a float, got {value!r}") from exc |
| #707 | |
| #708 | |
| #709 | def _int_env(name: str, default: int) -> int: |
| #710 | value = os.getenv(name) |
| #711 | if value is None or not value.strip(): |
| #712 | return default |
| #713 | try: |
| #714 | return int(value) |
| #715 | except ValueError as exc: |
| #716 | raise LLMError(f"{name} must be an integer, got {value!r}") from exc |
| #717 | |
| #718 | |
| #719 | def _optional_env(name: str) -> str | None: |
| #720 | value = os.getenv(name) |
| #721 | if value is None or not value.strip(): |
| #722 | return None |
| #723 | return value.strip() |
| #724 | |
| #725 | |
| #726 | def _verbosity_env(name: str) -> Literal["low", "medium", "high"] | None: |
| #727 | value = _optional_env(name) |
| #728 | if value is None: |
| #729 | return None |
| #730 | normalized = value.lower() |
| #731 | if normalized not in {"low", "medium", "high"}: |
| #732 | raise LLMError(f"{name} must be one of low, medium, high; got {value!r}") |
| #733 | return normalized # type: ignore[return-value] |
| #734 | |
| #735 | |
| #736 | def _run_sync(awaitable: Any) -> Any: |
| #737 | try: |
| #738 | asyncio.get_running_loop() |
| #739 | except RuntimeError: |
| #740 | return asyncio.run(awaitable) |
| #741 | if inspect.iscoroutine(awaitable): |
| #742 | awaitable.close() |
| #743 | raise RuntimeError("Cannot use sync LLMClient APIs inside a running event loop. Use async APIs instead.") |
| #744 | |
| #745 | |
| #746 | def load_dotenv(path: str | Path = ".env") -> None: |
| #747 | env_path = Path(path) |
| #748 | if not env_path.exists(): |
| #749 | return |
| #750 | for raw_line in env_path.read_text(encoding="utf-8").splitlines(): |
| #751 | line = raw_line.strip() |
| #752 | if not line or line.startswith("#") or "=" not in line: |
| #753 | continue |
| #754 | if line.startswith("export "): |
| #755 | line = line[len("export ") :].strip() |
| #756 | key, value = line.split("=", 1) |
| #757 | key = key.strip() |
| #758 | value = value.strip().strip('"').strip("'") |
| #759 | if key and key not in os.environ: |
| #760 | os.environ[key] = value |
| #761 |