repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | /** |
| #2 | * DeepSeek Inference Client |
| #3 | * |
| #4 | * Implements the InferenceClient interface using DeepSeek's OpenAI-compatible API. |
| #5 | * Supports thinking mode via reasoning_effort and extra_body. |
| #6 | * Replaces Conway's default inference when using DeepSeek models. |
| #7 | */ |
| #8 | |
| #9 | import type { |
| #10 | InferenceClient, |
| #11 | ChatMessage, |
| #12 | InferenceOptions, |
| #13 | InferenceResponse, |
| #14 | InferenceToolCall, |
| #15 | TokenUsage, |
| #16 | InferenceToolDefinition, |
| #17 | } from "../types.js"; |
| #18 | |
| #19 | export const DEEPSEEK_BASE_URL = "https://api.deepseek.com"; |
| #20 | export const DEEPSEEK_MODEL_PRO = "deepseek-v4-pro"; |
| #21 | export const DEEPSEEK_MODEL_FLASH = "deepseek-v4-flash"; |
| #22 | |
| #23 | interface DeepSeekInferenceClientOptions { |
| #24 | apiKey: string; |
| #25 | baseUrl?: string; |
| #26 | defaultModel?: string; |
| #27 | maxTokens?: number; |
| #28 | flashModel?: string; |
| #29 | proModel?: string; |
| #30 | } |
| #31 | |
| #32 | /** |
| #33 | * Create a DeepSeek inference client that conforms to the |
| #34 | * InferenceClient interface used throughout the automaton. |
| #35 | */ |
| #36 | export function createDeepSeekInferenceClient( |
| #37 | options: DeepSeekInferenceClientOptions, |
| #38 | ): InferenceClient { |
| #39 | const { |
| #40 | apiKey, |
| #41 | baseUrl = DEEPSEEK_BASE_URL, |
| #42 | defaultModel = DEEPSEEK_MODEL_PRO, |
| #43 | maxTokens: defaultMaxTokens = 4096, |
| #44 | flashModel = DEEPSEEK_MODEL_FLASH, |
| #45 | proModel = DEEPSEEK_MODEL_PRO, |
| #46 | } = options; |
| #47 | |
| #48 | let currentModel = defaultModel; |
| #49 | let maxTokens = defaultMaxTokens; |
| #50 | let lowCompute = false; |
| #51 | |
| #52 | const chat = async ( |
| #53 | messages: ChatMessage[], |
| #54 | opts?: InferenceOptions, |
| #55 | ): Promise<InferenceResponse> => { |
| #56 | const model = opts?.model || currentModel; |
| #57 | const tokenLimit = opts?.maxTokens || maxTokens; |
| #58 | |
| #59 | const body: Record<string, unknown> = { |
| #60 | model, |
| #61 | messages: messages.map(formatMessage), |
| #62 | max_tokens: tokenLimit, |
| #63 | stream: false, |
| #64 | }; |
| #65 | |
| #66 | if (opts?.temperature !== undefined) { |
| #67 | body.temperature = opts.temperature; |
| #68 | } |
| #69 | |
| #70 | const tools = opts?.tools; |
| #71 | if (tools && tools.length > 0) { |
| #72 | body.tools = tools; |
| #73 | body.tool_choice = "auto"; |
| #74 | } |
| #75 | |
| #76 | // DeepSeek thinking mode: enable reasoning with high effort |
| #77 | body.reasoning_effort = "high"; |
| #78 | body.extra_body = { thinking: { type: "enabled" } }; |
| #79 | |
| #80 | const resp = await fetch(`${baseUrl}/v1/chat/completions`, { |
| #81 | method: "POST", |
| #82 | headers: { |
| #83 | "Content-Type": "application/json", |
| #84 | Authorization: `Bearer ${apiKey}`, |
| #85 | }, |
| #86 | body: JSON.stringify(body), |
| #87 | }); |
| #88 | |
| #89 | if (!resp.ok) { |
| #90 | const text = await resp.text(); |
| #91 | throw new Error( |
| #92 | `DeepSeek inference error: ${resp.status}: ${text}`, |
| #93 | ); |
| #94 | } |
| #95 | |
| #96 | const data = (await resp.json()) as any; |
| #97 | const choice = data.choices?.[0]; |
| #98 | |
| #99 | if (!choice) { |
| #100 | throw new Error("No completion choice returned from DeepSeek inference"); |
| #101 | } |
| #102 | |
| #103 | const message = choice.message; |
| #104 | const usage: TokenUsage = { |
| #105 | promptTokens: data.usage?.prompt_tokens || 0, |
| #106 | completionTokens: data.usage?.completion_tokens || 0, |
| #107 | totalTokens: data.usage?.total_tokens || 0, |
| #108 | }; |
| #109 | |
| #110 | // Extract reasoning_content if available (DeepSeek thinking mode) |
| #111 | const reasoningContent = message.reasoning_content || ""; |
| #112 | |
| #113 | const toolCalls: InferenceToolCall[] | undefined = |
| #114 | message.tool_calls?.map((tc: any) => ({ |
| #115 | id: tc.id, |
| #116 | type: "function" as const, |
| #117 | function: { |
| #118 | name: tc.function.name, |
| #119 | arguments: tc.function.arguments, |
| #120 | }, |
| #121 | })); |
| #122 | |
| #123 | // Prepend reasoning content to the message if present |
| #124 | let content = message.content || ""; |
| #125 | if (reasoningContent) { |
| #126 | content = `[Thinking]\n${reasoningContent}\n\n[Response]\n${content}`; |
| #127 | } |
| #128 | |
| #129 | return { |
| #130 | id: data.id || "", |
| #131 | model: data.model || model, |
| #132 | message: { |
| #133 | role: message.role, |
| #134 | content, |
| #135 | tool_calls: toolCalls, |
| #136 | }, |
| #137 | toolCalls, |
| #138 | usage, |
| #139 | finishReason: choice.finish_reason || "stop", |
| #140 | }; |
| #141 | }; |
| #142 | |
| #143 | const setLowComputeMode = (enabled: boolean): void => { |
| #144 | lowCompute = enabled; |
| #145 | if (enabled) { |
| #146 | currentModel = flashModel; |
| #147 | maxTokens = 2048; |
| #148 | } else { |
| #149 | currentModel = defaultModel; |
| #150 | maxTokens = defaultMaxTokens; |
| #151 | } |
| #152 | }; |
| #153 | |
| #154 | const getDefaultModel = (): string => { |
| #155 | return currentModel; |
| #156 | }; |
| #157 | |
| #158 | return { |
| #159 | chat, |
| #160 | setLowComputeMode, |
| #161 | getDefaultModel, |
| #162 | }; |
| #163 | } |
| #164 | |
| #165 | /** |
| #166 | * Format a ChatMessage to the DeepSeek/OpenAI API format. |
| #167 | */ |
| #168 | function formatMessage( |
| #169 | msg: ChatMessage, |
| #170 | ): Record<string, unknown> { |
| #171 | const formatted: Record<string, unknown> = { |
| #172 | role: msg.role, |
| #173 | content: msg.content, |
| #174 | }; |
| #175 | |
| #176 | if (msg.name) formatted.name = msg.name; |
| #177 | if (msg.tool_calls) formatted.tool_calls = msg.tool_calls; |
| #178 | if (msg.tool_call_id) formatted.tool_call_id = msg.tool_call_id; |
| #179 | |
| #180 | return formatted; |
| #181 | } |
| #182 | |
| #183 | /** |
| #184 | * DeepSeek cost estimation in cents per million tokens. |
| #185 | */ |
| #186 | export const DEEPSEEK_PRICING: Record<string, { input: number; output: number }> = { |
| #187 | "deepseek-v4-pro": { input: 200, output: 800 }, |
| #188 | "deepseek-v4-flash": { input: 15, output: 60 }, |
| #189 | }; |
| #190 | |
| #191 | /** |
| #192 | * Estimate cost in cents for a DeepSeek inference call. |
| #193 | */ |
| #194 | export function estimateDeepSeekCostCents( |
| #195 | usage: { promptTokens: number; completionTokens: number }, |
| #196 | model: string, |
| #197 | ): number { |
| #198 | const p = DEEPSEEK_PRICING[model] || DEEPSEEK_PRICING["deepseek-v4-pro"]; |
| #199 | const inputCost = (usage.promptTokens / 1_000_000) * p.input; |
| #200 | const outputCost = (usage.completionTokens / 1_000_000) * p.output; |
| #201 | return Math.ceil(inputCost + outputCost); |
| #202 | } |
| #203 |