my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

my-project-public — gitlawb

#1	/**
#2	* DeepSeek Inference Client
#3	*
#4	* Implements the InferenceClient interface using DeepSeek's OpenAI-compatible API.
#5	* Supports thinking mode via reasoning_effort and extra_body.
#6	* Replaces Conway's default inference when using DeepSeek models.
#7	*/
#8
#9	import type {
#10	InferenceClient,
#11	ChatMessage,
#12	InferenceOptions,
#13	InferenceResponse,
#14	InferenceToolCall,
#15	TokenUsage,
#16	InferenceToolDefinition,
#17	} from "../types.js";
#18
#19	export const DEEPSEEK_BASE_URL = "https://api.deepseek.com";
#20	export const DEEPSEEK_MODEL_PRO = "deepseek-v4-pro";
#21	export const DEEPSEEK_MODEL_FLASH = "deepseek-v4-flash";
#22
#23	interface DeepSeekInferenceClientOptions {
#24	apiKey: string;
#25	baseUrl?: string;
#26	defaultModel?: string;
#27	maxTokens?: number;
#28	flashModel?: string;
#29	proModel?: string;
#30	}
#31
#32	/**
#33	* Create a DeepSeek inference client that conforms to the
#34	* InferenceClient interface used throughout the automaton.
#35	*/
#36	export function createDeepSeekInferenceClient(
#37	options: DeepSeekInferenceClientOptions,
#38	): InferenceClient {
#39	const {
#40	apiKey,
#41	baseUrl = DEEPSEEK_BASE_URL,
#42	defaultModel = DEEPSEEK_MODEL_PRO,
#43	maxTokens: defaultMaxTokens = 4096,
#44	flashModel = DEEPSEEK_MODEL_FLASH,
#45	proModel = DEEPSEEK_MODEL_PRO,
#46	} = options;
#47
#48	let currentModel = defaultModel;
#49	let maxTokens = defaultMaxTokens;
#50	let lowCompute = false;
#51
#52	const chat = async (
#53	messages: ChatMessage[],
#54	opts?: InferenceOptions,
#55	): Promise<InferenceResponse> => {
#56	const model = opts?.model \|\| currentModel;
#57	const tokenLimit = opts?.maxTokens \|\| maxTokens;
#58
#59	const body: Record<string, unknown> = {
#60	model,
#61	messages: messages.map(formatMessage),
#62	max_tokens: tokenLimit,
#63	stream: false,
#64	};
#65
#66	if (opts?.temperature !== undefined) {
#67	body.temperature = opts.temperature;
#68	}
#69
#70	const tools = opts?.tools;
#71	if (tools && tools.length > 0) {
#72	body.tools = tools;
#73	body.tool_choice = "auto";
#74	}
#75
#76	// DeepSeek thinking mode: enable reasoning with high effort
#77	body.reasoning_effort = "high";
#78	body.extra_body = { thinking: { type: "enabled" } };
#79
#80	const resp = await fetch(`${baseUrl}/v1/chat/completions`, {
#81	method: "POST",
#82	headers: {
#83	"Content-Type": "application/json",
#84	Authorization: `Bearer ${apiKey}`,
#85	},
#86	body: JSON.stringify(body),
#87	});
#88
#89	if (!resp.ok) {
#90	const text = await resp.text();
#91	throw new Error(
#92	`DeepSeek inference error: ${resp.status}: ${text}`,
#93	);
#94	}
#95
#96	const data = (await resp.json()) as any;
#97	const choice = data.choices?.[0];
#98
#99	if (!choice) {
#100	throw new Error("No completion choice returned from DeepSeek inference");
#101	}
#102
#103	const message = choice.message;
#104	const usage: TokenUsage = {
#105	promptTokens: data.usage?.prompt_tokens \|\| 0,
#106	completionTokens: data.usage?.completion_tokens \|\| 0,
#107	totalTokens: data.usage?.total_tokens \|\| 0,
#108	};
#109
#110	// Extract reasoning_content if available (DeepSeek thinking mode)
#111	const reasoningContent = message.reasoning_content \|\| "";
#112
#113	const toolCalls: InferenceToolCall[] \| undefined =
#114	message.tool_calls?.map((tc: any) => ({
#115	id: tc.id,
#116	type: "function" as const,
#117	function: {
#118	name: tc.function.name,
#119	arguments: tc.function.arguments,
#120	},
#121	}));
#122
#123	// Prepend reasoning content to the message if present
#124	let content = message.content \|\| "";
#125	if (reasoningContent) {
#126	content = `[Thinking]\n${reasoningContent}\n\n[Response]\n${content}`;
#127	}
#128
#129	return {
#130	id: data.id \|\| "",
#131	model: data.model \|\| model,
#132	message: {
#133	role: message.role,
#134	content,
#135	tool_calls: toolCalls,
#136	},
#137	toolCalls,
#138	usage,
#139	finishReason: choice.finish_reason \|\| "stop",
#140	};
#141	};
#142
#143	const setLowComputeMode = (enabled: boolean): void => {
#144	lowCompute = enabled;
#145	if (enabled) {
#146	currentModel = flashModel;
#147	maxTokens = 2048;
#148	} else {
#149	currentModel = defaultModel;
#150	maxTokens = defaultMaxTokens;
#151	}
#152	};
#153
#154	const getDefaultModel = (): string => {
#155	return currentModel;
#156	};
#157
#158	return {
#159	chat,
#160	setLowComputeMode,
#161	getDefaultModel,
#162	};
#163	}
#164
#165	/**
#166	* Format a ChatMessage to the DeepSeek/OpenAI API format.
#167	*/
#168	function formatMessage(
#169	msg: ChatMessage,
#170	): Record<string, unknown> {
#171	const formatted: Record<string, unknown> = {
#172	role: msg.role,
#173	content: msg.content,
#174	};
#175
#176	if (msg.name) formatted.name = msg.name;
#177	if (msg.tool_calls) formatted.tool_calls = msg.tool_calls;
#178	if (msg.tool_call_id) formatted.tool_call_id = msg.tool_call_id;
#179
#180	return formatted;
#181	}
#182
#183	/**
#184	* DeepSeek cost estimation in cents per million tokens.
#185	*/
#186	export const DEEPSEEK_PRICING: Record<string, { input: number; output: number }> = {
#187	"deepseek-v4-pro": { input: 200, output: 800 },
#188	"deepseek-v4-flash": { input: 15, output: 60 },
#189	};
#190
#191	/**
#192	* Estimate cost in cents for a DeepSeek inference call.
#193	*/
#194	export function estimateDeepSeekCostCents(
#195	usage: { promptTokens: number; completionTokens: number },
#196	model: string,
#197	): number {
#198	const p = DEEPSEEK_PRICING[model] \|\| DEEPSEEK_PRICING["deepseek-v4-pro"];
#199	const inputCost = (usage.promptTokens / 1_000_000) * p.input;
#200	const outputCost = (usage.completionTokens / 1_000_000) * p.output;
#201	return Math.ceil(inputCost + outputCost);
#202	}
#203

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public