repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
Mirrored from https://github.com/yingqi-z20/Agent-libOS
stars
latest
clone command
git clone gitlawb://did:key:z6MkqRzA...RfoM/yingqi-z20-Agen...git clone gitlawb://did:key:z6MkqRzA.../yingqi-z20-Agen...d98dd2c9IPC1d ago| #1 | from __future__ import annotations |
| #2 | |
| #3 | import asyncio |
| #4 | import inspect |
| #5 | import json |
| #6 | import os |
| #7 | import re |
| #8 | import shutil |
| #9 | import subprocess |
| #10 | import tempfile |
| #11 | import textwrap |
| #12 | from collections.abc import Awaitable, Callable |
| #13 | from pathlib import Path |
| #14 | from typing import Any |
| #15 | |
| #16 | from agent_libos.config import DEFAULT_CONFIG |
| #17 | from agent_libos.models.exceptions import SandboxError |
| #18 | from agent_libos.models import ValidationResult |
| #19 | from agent_libos.utils.serde import to_jsonable |
| #20 | |
| #21 | _TOOL_DEFAULTS = DEFAULT_CONFIG.tools |
| #22 | |
| #23 | SyscallHandler = Callable[[str, dict[str, Any]], Any | Awaitable[Any]] |
| #24 | |
| #25 | |
| #26 | class SandboxBackend: |
| #27 | language = "typescript" |
| #28 | |
| #29 | def static_check(self, source_code: str) -> ValidationResult: |
| #30 | raise NotImplementedError |
| #31 | |
| #32 | async def arun_source( |
| #33 | self, |
| #34 | source_code: str, |
| #35 | args: dict[str, Any], |
| #36 | *, |
| #37 | pid: str | None = None, |
| #38 | syscall_handler: SyscallHandler | None = None, |
| #39 | timeout: float | None = None, |
| #40 | ) -> Any: |
| #41 | raise NotImplementedError |
| #42 | |
| #43 | def run_source( |
| #44 | self, |
| #45 | source_code: str, |
| #46 | args: dict[str, Any], |
| #47 | *, |
| #48 | pid: str | None = None, |
| #49 | syscall_handler: SyscallHandler | None = None, |
| #50 | timeout: float | None = None, |
| #51 | ) -> Any: |
| #52 | try: |
| #53 | asyncio.get_running_loop() |
| #54 | except RuntimeError: |
| #55 | return asyncio.run( |
| #56 | self.arun_source( |
| #57 | source_code, |
| #58 | args, |
| #59 | pid=pid, |
| #60 | syscall_handler=syscall_handler, |
| #61 | timeout=timeout, |
| #62 | ) |
| #63 | ) |
| #64 | raise RuntimeError("Cannot call run_source() inside a running event loop. Use await arun_source(...).") |
| #65 | |
| #66 | def run_tests( |
| #67 | self, |
| #68 | source_code: str, |
| #69 | tests: list[dict[str, Any]], |
| #70 | timeout: float | None = None, |
| #71 | ) -> ValidationResult: |
| #72 | raise NotImplementedError |
| #73 | |
| #74 | def metadata_for_source(self, source_code: str) -> dict[str, Any]: |
| #75 | return {"language": self.language} |
| #76 | |
| #77 | |
| #78 | class DenoTypescriptSandbox(SandboxBackend): |
| #79 | """Deno/TypeScript sandbox for Agent-authored tools. |
| #80 | |
| #81 | Candidate tools run as Deno userland programs without host permissions. |
| #82 | Their only libOS access path is the NDJSON syscall protocol handled by the |
| #83 | libOS runtime broker over stdin/stdout. |
| #84 | """ |
| #85 | |
| #86 | _IMPORT_RE = re.compile( |
| #87 | r"^\s*(?:import|export)\s+(?:[^\"']*?\s+from\s+)?[\"']([^\"']+)[\"']", |
| #88 | re.MULTILINE, |
| #89 | ) |
| #90 | _SIDE_EFFECT_IMPORT_RE = re.compile(r"^\s*import\s*[\"']([^\"']+)[\"']", re.MULTILINE) |
| #91 | _RUN_EXPORT_RE = re.compile(r"export\s+(?:async\s+)?function\s+run\s*\(") |
| #92 | _DANGEROUS_PATTERNS = { |
| #93 | "Deno": re.compile(r"(?<![\w$])Deno(?![\w$])"), |
| #94 | "globalThis.Deno": re.compile(r"globalThis\s*\.\s*Deno"), |
| #95 | "eval": re.compile(r"(?<![\w$])eval\s*\("), |
| #96 | "Function": re.compile(r"(?<![\w$])Function\s*\("), |
| #97 | "Worker": re.compile(r"(?<![\w$])Worker\s*\("), |
| #98 | "WebAssembly": re.compile(r"(?<![\w$])WebAssembly(?![\w$])"), |
| #99 | } |
| #100 | |
| #101 | def __init__( |
| #102 | self, |
| #103 | *, |
| #104 | deno_executable: str = _TOOL_DEFAULTS.deno_executable, |
| #105 | default_timeout_s: float = _TOOL_DEFAULTS.deno_timeout_s, |
| #106 | max_rpc_calls: int = _TOOL_DEFAULTS.deno_max_rpc_calls, |
| #107 | max_stdout_bytes: int = _TOOL_DEFAULTS.deno_max_stdout_bytes, |
| #108 | max_stderr_bytes: int = _TOOL_DEFAULTS.deno_max_stderr_bytes, |
| #109 | jsr_allowlist: tuple[str, ...] = _TOOL_DEFAULTS.deno_jsr_allowlist, |
| #110 | ) -> None: |
| #111 | self.deno_executable = deno_executable |
| #112 | self.default_timeout_s = default_timeout_s |
| #113 | self.max_rpc_calls = max_rpc_calls |
| #114 | self.max_stdout_bytes = max_stdout_bytes |
| #115 | self.max_stderr_bytes = max_stderr_bytes |
| #116 | self.jsr_allowlist = tuple(jsr_allowlist) |
| #117 | |
| #118 | def static_check(self, source_code: str) -> ValidationResult: |
| #119 | errors: list[str] = [] |
| #120 | warnings: list[str] = [] |
| #121 | if not self._RUN_EXPORT_RE.search(source_code): |
| #122 | errors.append("TypeScript tool source must export function run(args, libos)") |
| #123 | if re.search(r"\bimport\s*\(", source_code): |
| #124 | errors.append("dynamic import() is not allowed") |
| #125 | for label, pattern in self._DANGEROUS_PATTERNS.items(): |
| #126 | if pattern.search(source_code): |
| #127 | errors.append(f"dangerous TypeScript API is not allowed: {label}") |
| #128 | for specifier in self._extract_imports(source_code): |
| #129 | package = self._jsr_package(specifier) |
| #130 | if package is None: |
| #131 | errors.append(f"import is not allowed: {specifier}") |
| #132 | continue |
| #133 | if package not in self.jsr_allowlist: |
| #134 | errors.append(f"JSR package is not in allowlist: {package}") |
| #135 | return ValidationResult(ok=not errors, errors=errors, warnings=warnings) |
| #136 | |
| #137 | async def arun_source( |
| #138 | self, |
| #139 | source_code: str, |
| #140 | args: dict[str, Any], |
| #141 | *, |
| #142 | pid: str | None = None, |
| #143 | syscall_handler: SyscallHandler | None = None, |
| #144 | timeout: float | None = None, |
| #145 | ) -> Any: |
| #146 | validation = self.static_check(source_code) |
| #147 | if not validation.ok: |
| #148 | raise SandboxError("; ".join(validation.errors)) |
| #149 | deno = self._resolve_deno() |
| #150 | selected_timeout = self.default_timeout_s if timeout is None else timeout |
| #151 | with tempfile.TemporaryDirectory(prefix="agent_libos_deno_tool_") as tmp: |
| #152 | tmp_path = Path(tmp) |
| #153 | (tmp_path / "candidate.ts").write_text(source_code, encoding="utf-8") |
| #154 | (tmp_path / "runner.ts").write_text(self._runner_source(), encoding="utf-8") |
| #155 | proc = await asyncio.create_subprocess_exec( |
| #156 | deno, |
| #157 | "run", |
| #158 | "--no-prompt", |
| #159 | "runner.ts", |
| #160 | cwd=tmp, |
| #161 | stdin=asyncio.subprocess.PIPE, |
| #162 | stdout=asyncio.subprocess.PIPE, |
| #163 | stderr=asyncio.subprocess.PIPE, |
| #164 | ) |
| #165 | try: |
| #166 | return await asyncio.wait_for( |
| #167 | self._serve_process(proc, args, syscall_handler), |
| #168 | timeout=selected_timeout, |
| #169 | ) |
| #170 | except TimeoutError as exc: |
| #171 | await self._kill_process(proc) |
| #172 | raise TimeoutError(f"Deno JIT tool timed out after {selected_timeout}s") from exc |
| #173 | except Exception: |
| #174 | await self._kill_process(proc) |
| #175 | raise |
| #176 | |
| #177 | def run_tests( |
| #178 | self, |
| #179 | source_code: str, |
| #180 | tests: list[dict[str, Any]], |
| #181 | timeout: float | None = None, |
| #182 | ) -> ValidationResult: |
| #183 | validation = self.static_check(source_code) |
| #184 | if not validation.ok: |
| #185 | return validation |
| #186 | try: |
| #187 | version = self.deno_version() |
| #188 | except SandboxError as exc: |
| #189 | return ValidationResult(ok=False, errors=[str(exc)]) |
| #190 | errors: list[str] = [] |
| #191 | logs: list[str] = [f"language=typescript", f"deno={version}"] |
| #192 | for index, test in enumerate(tests, start=1): |
| #193 | syscall_handler, assert_syscalls_consumed = self._test_syscall_handler(test, index) |
| #194 | try: |
| #195 | result = self.run_source( |
| #196 | source_code, |
| #197 | test.get("args", {}), |
| #198 | syscall_handler=syscall_handler, |
| #199 | timeout=timeout, |
| #200 | ) |
| #201 | assert_syscalls_consumed() |
| #202 | except Exception as exc: |
| #203 | errors.append(f"test {index} failed to run: {exc}") |
| #204 | continue |
| #205 | logs.append(f"test {index} result: {result!r}") |
| #206 | if "expected" in test and result != test["expected"]: |
| #207 | errors.append(f"test {index} expected {test['expected']!r}, got {result!r}") |
| #208 | return ValidationResult(ok=not errors, errors=errors, logs="\n".join(logs)) |
| #209 | |
| #210 | def metadata_for_source(self, source_code: str) -> dict[str, Any]: |
| #211 | metadata: dict[str, Any] = { |
| #212 | "language": "typescript", |
| #213 | "imports": self._extract_imports(source_code), |
| #214 | "jsr_allowlist": list(self.jsr_allowlist), |
| #215 | } |
| #216 | try: |
| #217 | metadata["deno_version"] = self.deno_version() |
| #218 | except SandboxError as exc: |
| #219 | metadata["deno_version_error"] = str(exc) |
| #220 | return metadata |
| #221 | |
| #222 | def deno_version(self) -> str: |
| #223 | deno = self._resolve_deno() |
| #224 | try: |
| #225 | proc = subprocess.run( |
| #226 | [deno, "--version"], |
| #227 | text=True, |
| #228 | capture_output=True, |
| #229 | timeout=min(self.default_timeout_s, 5.0), |
| #230 | ) |
| #231 | except Exception as exc: |
| #232 | raise SandboxError(f"failed to run Deno executable {deno!r}: {exc}") from exc |
| #233 | if proc.returncode != 0: |
| #234 | message = proc.stderr.strip() or proc.stdout.strip() or f"deno exited {proc.returncode}" |
| #235 | raise SandboxError(message) |
| #236 | return proc.stdout.strip().splitlines()[0] if proc.stdout.strip() else "deno" |
| #237 | |
| #238 | async def _serve_process( |
| #239 | self, |
| #240 | proc: asyncio.subprocess.Process, |
| #241 | args: dict[str, Any], |
| #242 | syscall_handler: SyscallHandler | None, |
| #243 | ) -> Any: |
| #244 | if proc.stdin is None or proc.stdout is None or proc.stderr is None: |
| #245 | raise SandboxError("Deno process was not created with stdio pipes") |
| #246 | stderr_task = asyncio.create_task(proc.stderr.read(self.max_stderr_bytes + 1)) |
| #247 | await self._write_frame(proc, {"type": "run", "args": to_jsonable(args)}) |
| #248 | stdout_bytes = 0 |
| #249 | rpc_calls = 0 |
| #250 | while True: |
| #251 | line = await proc.stdout.readline() |
| #252 | if not line: |
| #253 | stderr, _stderr_truncated = await self._finish_stderr(stderr_task) |
| #254 | code = await proc.wait() |
| #255 | raise SandboxError(stderr.strip() or f"Deno JIT tool exited before result: {code}") |
| #256 | stdout_bytes += len(line) |
| #257 | if stdout_bytes > self.max_stdout_bytes: |
| #258 | raise SandboxError("Deno JIT stdout exceeded max bytes") |
| #259 | try: |
| #260 | frame = json.loads(line.decode("utf-8")) |
| #261 | except json.JSONDecodeError as exc: |
| #262 | raise SandboxError(f"Deno JIT produced non-protocol stdout: {line[:200]!r}") from exc |
| #263 | frame_type = frame.get("type") |
| #264 | if frame_type == "syscall": |
| #265 | rpc_calls += 1 |
| #266 | if rpc_calls > self.max_rpc_calls: |
| #267 | await self._write_frame( |
| #268 | proc, |
| #269 | { |
| #270 | "type": "syscall_result", |
| #271 | "id": frame.get("id"), |
| #272 | "ok": False, |
| #273 | "error": f"Deno JIT exceeded max_rpc_calls={self.max_rpc_calls}", |
| #274 | }, |
| #275 | ) |
| #276 | continue |
| #277 | await self._handle_syscall_frame(proc, frame, syscall_handler) |
| #278 | continue |
| #279 | if frame_type == "result": |
| #280 | # The result frame is the protocol boundary for a successful |
| #281 | # tool call. Any remaining Deno event-loop handles belong to |
| #282 | # this transient tool process and must not delay lifecycle |
| #283 | # syscalls or scheduler progress. |
| #284 | value = frame.get("value") |
| #285 | await self._kill_process(proc) |
| #286 | stderr, stderr_truncated = await self._finish_stderr(stderr_task) |
| #287 | if stderr_truncated: |
| #288 | raise SandboxError("Deno JIT stderr exceeded max bytes") |
| #289 | return value |
| #290 | if frame_type == "error": |
| #291 | stderr, _stderr_truncated = await self._finish_stderr(stderr_task) |
| #292 | message = str(frame.get("message") or stderr or "Deno JIT tool failed") |
| #293 | raise SandboxError(message) |
| #294 | raise SandboxError(f"unknown Deno JIT protocol frame: {frame_type!r}") |
| #295 | |
| #296 | async def _handle_syscall_frame( |
| #297 | self, |
| #298 | proc: asyncio.subprocess.Process, |
| #299 | frame: dict[str, Any], |
| #300 | syscall_handler: SyscallHandler | None, |
| #301 | ) -> None: |
| #302 | frame_id = frame.get("id") |
| #303 | if syscall_handler is None: |
| #304 | await self._write_frame( |
| #305 | proc, |
| #306 | {"type": "syscall_result", "id": frame_id, "ok": False, "error": "libOS syscall handler is unavailable"}, |
| #307 | ) |
| #308 | return |
| #309 | name = str(frame.get("name") or "") |
| #310 | args = frame.get("args") |
| #311 | if not isinstance(args, dict): |
| #312 | args = {} |
| #313 | try: |
| #314 | result = syscall_handler(name, args) |
| #315 | if inspect.isawaitable(result): |
| #316 | result = await result |
| #317 | await self._write_frame( |
| #318 | proc, |
| #319 | {"type": "syscall_result", "id": frame_id, "ok": True, "payload": to_jsonable(result)}, |
| #320 | ) |
| #321 | except Exception as exc: |
| #322 | await self._write_frame( |
| #323 | proc, |
| #324 | { |
| #325 | "type": "syscall_result", |
| #326 | "id": frame_id, |
| #327 | "ok": False, |
| #328 | "error": str(exc), |
| #329 | "error_type": type(exc).__name__, |
| #330 | }, |
| #331 | ) |
| #332 | |
| #333 | async def _write_frame(self, proc: asyncio.subprocess.Process, frame: dict[str, Any]) -> None: |
| #334 | if proc.stdin is None: |
| #335 | raise SandboxError("Deno process stdin is closed") |
| #336 | proc.stdin.write((json.dumps(frame, ensure_ascii=True, default=str) + "\n").encode("utf-8")) |
| #337 | await proc.stdin.drain() |
| #338 | |
| #339 | async def _finish_stderr(self, stderr_task: asyncio.Task[bytes]) -> tuple[str, bool]: |
| #340 | try: |
| #341 | data = await stderr_task |
| #342 | except Exception: |
| #343 | return "", False |
| #344 | return data[: self.max_stderr_bytes].decode("utf-8", errors="replace"), len(data) > self.max_stderr_bytes |
| #345 | |
| #346 | async def _kill_process(self, proc: asyncio.subprocess.Process) -> None: |
| #347 | if proc.returncode is not None: |
| #348 | return |
| #349 | try: |
| #350 | proc.kill() |
| #351 | except ProcessLookupError: |
| #352 | return |
| #353 | await proc.wait() |
| #354 | |
| #355 | def _test_syscall_handler(self, test: dict[str, Any], index: int) -> tuple[SyscallHandler, Callable[[], None]]: |
| #356 | expected = list(test.get("syscalls", [])) |
| #357 | |
| #358 | async def handler(name: str, args: dict[str, Any]) -> Any: |
| #359 | if not expected: |
| #360 | raise SandboxError(f"test {index} did not expect syscall {name}") |
| #361 | spec = expected.pop(0) |
| #362 | expected_name = spec.get("name") |
| #363 | if expected_name != name: |
| #364 | raise SandboxError(f"test {index} expected syscall {expected_name}, got {name}") |
| #365 | if "args" in spec and spec["args"] != args: |
| #366 | raise SandboxError(f"test {index} syscall {name} expected args {spec['args']!r}, got {args!r}") |
| #367 | if spec.get("ok", True) is False: |
| #368 | raise SandboxError(str(spec.get("error", "mock syscall failed"))) |
| #369 | return spec.get("result", spec.get("payload")) |
| #370 | |
| #371 | def assert_consumed() -> None: |
| #372 | if expected: |
| #373 | missing = [str(spec.get("name", "<unnamed>")) for spec in expected] |
| #374 | raise SandboxError(f"test {index} expected syscall(s) not performed: {missing}") |
| #375 | |
| #376 | return handler, assert_consumed |
| #377 | |
| #378 | def _extract_imports(self, source_code: str) -> list[str]: |
| #379 | imports = [match.group(1) for match in self._IMPORT_RE.finditer(source_code)] |
| #380 | imports.extend(match.group(1) for match in self._SIDE_EFFECT_IMPORT_RE.finditer(source_code)) |
| #381 | return sorted(set(imports)) |
| #382 | |
| #383 | def _jsr_package(self, specifier: str) -> str | None: |
| #384 | if not specifier.startswith("jsr:"): |
| #385 | return None |
| #386 | body = specifier[4:] |
| #387 | if not body.startswith("@"): |
| #388 | return None |
| #389 | parts = body.split("/") |
| #390 | if len(parts) < 2: |
| #391 | return None |
| #392 | scope = parts[0] |
| #393 | name = parts[1].split("@", 1)[0] |
| #394 | if not scope or not name: |
| #395 | return None |
| #396 | return f"{scope}/{name}" |
| #397 | |
| #398 | def _resolve_deno(self) -> str: |
| #399 | candidate = self.deno_executable |
| #400 | if os.path.sep in candidate or (os.path.altsep and os.path.altsep in candidate): |
| #401 | path = Path(candidate) |
| #402 | if path.exists(): |
| #403 | return str(path) |
| #404 | resolved = shutil.which(candidate) |
| #405 | if resolved is None: |
| #406 | raise SandboxError( |
| #407 | f"Deno executable not found: {candidate!r}. Install Deno or configure tools.deno_executable." |
| #408 | ) |
| #409 | return resolved |
| #410 | |
| #411 | def _runner_source(self) -> str: |
| #412 | return textwrap.dedent( |
| #413 | """ |
| #414 | import { run } from "./candidate.ts"; |
| #415 | |
| #416 | const decoder = new TextDecoder(); |
| #417 | const encoder = new TextEncoder(); |
| #418 | const stdout = Deno.stdout.writable.getWriter(); |
| #419 | const stdin = Deno.stdin.readable.getReader(); |
| #420 | let buffer = ""; |
| #421 | |
| #422 | console.log = (...args: unknown[]) => console.error(...args); |
| #423 | |
| #424 | async function readFrame(): Promise<Record<string, unknown>> { |
| #425 | while (true) { |
| #426 | const newline = buffer.indexOf("\\n"); |
| #427 | if (newline >= 0) { |
| #428 | const line = buffer.slice(0, newline); |
| #429 | buffer = buffer.slice(newline + 1); |
| #430 | if (line.trim().length === 0) continue; |
| #431 | return JSON.parse(line); |
| #432 | } |
| #433 | const chunk = await stdin.read(); |
| #434 | if (chunk.done) throw new Error("stdin closed before protocol frame"); |
| #435 | buffer += decoder.decode(chunk.value, { stream: true }); |
| #436 | } |
| #437 | } |
| #438 | |
| #439 | async function writeFrame(frame: Record<string, unknown>): Promise<void> { |
| #440 | await stdout.write(encoder.encode(JSON.stringify(frame) + "\\n")); |
| #441 | } |
| #442 | |
| #443 | const libos = { |
| #444 | async syscall(name: string, args: Record<string, unknown> = {}): Promise<unknown> { |
| #445 | const id = crypto.randomUUID(); |
| #446 | await writeFrame({ type: "syscall", id, name, args }); |
| #447 | while (true) { |
| #448 | const frame = await readFrame(); |
| #449 | if (frame.type !== "syscall_result" || frame.id !== id) continue; |
| #450 | if (frame.ok) return frame.payload; |
| #451 | const error = new Error(String(frame.error ?? "libOS syscall failed")); |
| #452 | (error as Error & { details?: unknown }).details = frame; |
| #453 | throw error; |
| #454 | } |
| #455 | }, |
| #456 | }; |
| #457 | |
| #458 | try { |
| #459 | const frame = await readFrame(); |
| #460 | if (frame.type !== "run") throw new Error("first protocol frame must be run"); |
| #461 | const value = await run(frame.args ?? {}, libos); |
| #462 | await writeFrame({ type: "result", value }); |
| #463 | } catch (error) { |
| #464 | await writeFrame({ |
| #465 | type: "error", |
| #466 | message: error instanceof Error ? error.message : String(error), |
| #467 | stack: error instanceof Error ? error.stack : undefined, |
| #468 | }); |
| #469 | Deno.exit(1); |
| #470 | } finally { |
| #471 | stdout.releaseLock(); |
| #472 | stdin.releaseLock(); |
| #473 | } |
| #474 | """ |
| #475 | ).strip() |
| #476 |