repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | /** |
| #2 | * DeepSeek Inference Client |
| #3 | * |
| #4 | * Implements the InferenceClient interface using DeepSeek's OpenAI-compatible API. |
| #5 | * Supports thinking mode via reasoning_effort and extra_body. |
| #6 | * Replaces CLAWD Runtime's default inference when using DeepSeek models. |
| #7 | */ |
| #8 | export const DEEPSEEK_BASE_URL = "https://api.deepseek.com"; |
| #9 | export const DEEPSEEK_MODEL_PRO = "deepseek-v4-pro"; |
| #10 | export const DEEPSEEK_MODEL_FLASH = "deepseek-v4-flash"; |
| #11 | /** |
| #12 | * Create a DeepSeek inference client that conforms to the |
| #13 | * InferenceClient interface used throughout the automaton. |
| #14 | */ |
| #15 | export function createDeepSeekInferenceClient(options) { |
| #16 | const { apiKey, baseUrl = DEEPSEEK_BASE_URL, defaultModel = DEEPSEEK_MODEL_PRO, maxTokens: defaultMaxTokens = 4096, flashModel = DEEPSEEK_MODEL_FLASH, proModel = DEEPSEEK_MODEL_PRO, } = options; |
| #17 | let currentModel = defaultModel; |
| #18 | let maxTokens = defaultMaxTokens; |
| #19 | let lowCompute = false; |
| #20 | const chat = async (messages, opts) => { |
| #21 | const model = opts?.model || currentModel; |
| #22 | const tokenLimit = opts?.maxTokens || maxTokens; |
| #23 | const body = { |
| #24 | model, |
| #25 | messages: messages.map(formatMessage), |
| #26 | max_tokens: tokenLimit, |
| #27 | stream: false, |
| #28 | }; |
| #29 | if (opts?.temperature !== undefined) { |
| #30 | body.temperature = opts.temperature; |
| #31 | } |
| #32 | const tools = opts?.tools; |
| #33 | if (tools && tools.length > 0) { |
| #34 | body.tools = tools; |
| #35 | body.tool_choice = "auto"; |
| #36 | } |
| #37 | // DeepSeek thinking mode: enable reasoning with high effort |
| #38 | body.reasoning_effort = "high"; |
| #39 | body.extra_body = { thinking: { type: "enabled" } }; |
| #40 | const resp = await fetch(`${baseUrl}/v1/chat/completions`, { |
| #41 | method: "POST", |
| #42 | headers: { |
| #43 | "Content-Type": "application/json", |
| #44 | Authorization: `Bearer ${apiKey}`, |
| #45 | }, |
| #46 | body: JSON.stringify(body), |
| #47 | }); |
| #48 | if (!resp.ok) { |
| #49 | const text = await resp.text(); |
| #50 | throw new Error(`DeepSeek inference error: ${resp.status}: ${text}`); |
| #51 | } |
| #52 | const data = (await resp.json()); |
| #53 | const choice = data.choices?.[0]; |
| #54 | if (!choice) { |
| #55 | throw new Error("No completion choice returned from DeepSeek inference"); |
| #56 | } |
| #57 | const message = choice.message; |
| #58 | const usage = { |
| #59 | promptTokens: data.usage?.prompt_tokens || 0, |
| #60 | completionTokens: data.usage?.completion_tokens || 0, |
| #61 | totalTokens: data.usage?.total_tokens || 0, |
| #62 | }; |
| #63 | // Extract reasoning_content if available (DeepSeek thinking mode) |
| #64 | const reasoningContent = message.reasoning_content || ""; |
| #65 | const toolCalls = message.tool_calls?.map((tc) => ({ |
| #66 | id: tc.id, |
| #67 | type: "function", |
| #68 | function: { |
| #69 | name: tc.function.name, |
| #70 | arguments: tc.function.arguments, |
| #71 | }, |
| #72 | })); |
| #73 | // Prepend reasoning content to the message if present |
| #74 | let content = message.content || ""; |
| #75 | if (reasoningContent) { |
| #76 | content = `[Thinking]\n${reasoningContent}\n\n[Response]\n${content}`; |
| #77 | } |
| #78 | return { |
| #79 | id: data.id || "", |
| #80 | model: data.model || model, |
| #81 | message: { |
| #82 | role: message.role, |
| #83 | content, |
| #84 | tool_calls: toolCalls, |
| #85 | }, |
| #86 | toolCalls, |
| #87 | usage, |
| #88 | finishReason: choice.finish_reason || "stop", |
| #89 | }; |
| #90 | }; |
| #91 | const setLowComputeMode = (enabled) => { |
| #92 | lowCompute = enabled; |
| #93 | if (enabled) { |
| #94 | currentModel = flashModel; |
| #95 | maxTokens = 2048; |
| #96 | } |
| #97 | else { |
| #98 | currentModel = defaultModel; |
| #99 | maxTokens = defaultMaxTokens; |
| #100 | } |
| #101 | }; |
| #102 | const getDefaultModel = () => { |
| #103 | return currentModel; |
| #104 | }; |
| #105 | return { |
| #106 | chat, |
| #107 | setLowComputeMode, |
| #108 | getDefaultModel, |
| #109 | }; |
| #110 | } |
| #111 | /** |
| #112 | * Format a ChatMessage to the DeepSeek/OpenAI API format. |
| #113 | */ |
| #114 | function formatMessage(msg) { |
| #115 | const formatted = { |
| #116 | role: msg.role, |
| #117 | content: msg.content, |
| #118 | }; |
| #119 | if (msg.name) |
| #120 | formatted.name = msg.name; |
| #121 | if (msg.tool_calls) |
| #122 | formatted.tool_calls = msg.tool_calls; |
| #123 | if (msg.tool_call_id) |
| #124 | formatted.tool_call_id = msg.tool_call_id; |
| #125 | return formatted; |
| #126 | } |
| #127 | /** |
| #128 | * DeepSeek cost estimation in cents per million tokens. |
| #129 | */ |
| #130 | export const DEEPSEEK_PRICING = { |
| #131 | "deepseek-v4-pro": { input: 200, output: 800 }, |
| #132 | "deepseek-v4-flash": { input: 15, output: 60 }, |
| #133 | }; |
| #134 | /** |
| #135 | * Estimate cost in cents for a DeepSeek inference call. |
| #136 | */ |
| #137 | export function estimateDeepSeekCostCents(usage, model) { |
| #138 | const p = DEEPSEEK_PRICING[model] || DEEPSEEK_PRICING["deepseek-v4-pro"]; |
| #139 | const inputCost = (usage.promptTokens / 1_000_000) * p.input; |
| #140 | const outputCost = (usage.completionTokens / 1_000_000) * p.output; |
| #141 | return Math.ceil(inputCost + outputCost); |
| #142 | } |
| #143 | //# sourceMappingURL=deepseek-inference.js.map |