living-os-cockpit

repository

loading code, commits, and activity

repositories

loading repo index

#1	import { NextRequest, NextResponse } from 'next/server';
#2	import { authErrorResponse, getUserContext, AuthMismatchError, AuthRequiredError, type UserContext } from '@/lib/user-context';
#3	import {
#4	canRunPlan,
#5	canRunTool,
#6	classifyAgenticPlan,
#7	classifyToolIntent,
#8	confirmationMarkdown,
#9	executeToolCall,
#10	isApprovalText,
#11	resultMarkdown,
#12	sessionIdFromBody,
#13	storePendingPlan,
#14	storePendingTool,
#15	takePendingTool,
#16	} from '@/lib/agentic-tools';
#17
#18	export const runtime = 'nodejs';
#19	export const maxDuration = 300;
#20
#21	interface DelegationRequest {
#22	router: string;
#23	executor: string;
#24	}
#25
#26	type ChatPayload = {
#27	model?: string;
#28	messages?: Array<{ role: string; content: unknown }>;
#29	max_tokens?: number;
#30	temperature?: number;
#31	delegation?: DelegationRequest \| null;
#32	async?: boolean;
#33	};
#34
#35	type ChatJob = {
#36	id: string;
#37	status: 'running' \| 'complete' \| 'failed';
#38	created: number;
#39	result?: Record<string, unknown>;
#40	error?: Record<string, unknown>;
#41	};
#42
#43	const jobStore = (() => {
#44	const g = globalThis as typeof globalThis & { __aethonChatJobs?: Map<string, ChatJob> };
#45	if (!g.__aethonChatJobs) g.__aethonChatJobs = new Map<string, ChatJob>();
#46	return g.__aethonChatJobs;
#47	})();
#48
#49	export async function GET(req: NextRequest) {
#50	const jobId = req.nextUrl.searchParams.get('job_id') ?? '';
#51	if (!jobId) return NextResponse.json({ error: 'job_id_required' }, { status: 400 });
#52	const job = jobStore.get(jobId);
#53	if (!job) return NextResponse.json({ error: 'job_not_found', job_id: jobId }, { status: 404 });
#54	return NextResponse.json(job);
#55	}
#56
#57	export async function POST(req: NextRequest) {
#58	try {
#59	const ctx = await getUserContext();
#60	const body = (await req.json()) as ChatPayload;
#61
#62	if (body.async \|\| shouldRunAsync(body)) {
#63	const jobId = `chatjob-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
#64	const job: ChatJob = { id: jobId, status: 'running', created: Date.now() };
#65	jobStore.set(jobId, job);
#66	pruneJobs();
#67
#68	void executeChat(ctx, body)
#69	.then((result) => {
#70	job.status = 'complete';
#71	job.result = result;
#72	})
#73	.catch((error: any) => {
#74	job.status = 'failed';
#75	job.error = {
#76	error: 'chat_job_failed',
#77	detail: String(error?.message ?? error),
#78	};
#79	});
#80
#81	return NextResponse.json({
#82	pending: true,
#83	job_id: jobId,
#84	status: 'running',
#85	message: 'Aethon is still working. This long-running request is being processed outside the Cloudflare timeout window.',
#86	});
#87	}
#88
#89	const result = await executeChat(ctx, body);
#90	return NextResponse.json(result, { status: result.error ? 502 : 200 });
#91	} catch (error) {
#92	if (!(error instanceof AuthRequiredError) && !(error instanceof AuthMismatchError)) {
#93	return NextResponse.json(completion(`Aethon hit an execution error before the plan could finish: ${String((error as any)?.message \|\| error)}`, { model: 'aethon-chief-of-staff', error: true }), { status: 200 });
#94	}
#95	return authErrorResponse(error);
#96	}
#97	}
#98
#99	async function executeChat(ctx: UserContext, body: ChatPayload): Promise<Record<string, unknown>> {
#100	const {
#101	model = 'deepseek-v4-flash',
#102	messages = [],
#103	max_tokens = 8192,
#104	temperature = 1.0,
#105	delegation = null,
#106	} = body;
#107
#108	const agentic = await maybeAgenticJson(ctx, body);
#109	if (agentic) return agentic;
#110
#111	if (!delegation) {
#112	return await callDaemon(ctx, {
#113	model, messages, max_tokens, temperature,
#114	});
#115	}
#116
#117	const routerSystemPrompt = {
#118	role: 'system' as const,
#119	content: `You are the routing layer for a delegation pipeline. The user's actual query will be answered by ${delegation.executor}, not you. Your job: read the user's input carefully, produce a structured spec that ${delegation.executor} can execute against. Include:
#120	1. Restated objective (one sentence)
#121	2. Key context the executor must know
#122	3. Specific output format requested
#123	4. Constraints (length, tone, audience)
#124	5. If there are documents/PDFs in the input, summarize their key points so executor doesn't have to re-read
#125	Output as concise markdown. Do NOT answer the user's question yourself.`,
#126	};
#127
#128	const routerJson = await callDaemon(ctx, {
#129	model: delegation.router,
#130	messages: [routerSystemPrompt, ...messages],
#131	max_tokens: 1500,
#132	temperature: 0.7,
#133	});
#134
#135	if (routerJson.error) {
#136	return { error: 'Router (Tier 1) failed', detail: routerJson };
#137	}
#138
#139	const spec = String((routerJson as any)?.choices?.[0]?.message?.content ?? '');
#140
#141	const executorMessages = [
#142	{
#143	role: 'system',
#144	content: `You are an execution layer. The user's query has been analyzed by ${delegation.router}. Follow this spec exactly:\n\n---\n${spec}\n---\n\nProduce the deliverable specified.`,
#145	},
#146	...messages,
#147	];
#148
#149	const execJson = await callDaemon(ctx, {
#150	model: delegation.executor,
#151	messages: executorMessages,
#152	max_tokens,
#153	temperature,
#154	});
#155
#156	if (execJson.error) {
#157	return { error: 'Executor (Tier 2) failed', detail: execJson, spec };
#158	}
#159
#160	return {
#161	...execJson,
#162	delegation: {
#163	router: delegation.router,
#164	executor: delegation.executor,
#165	routerSpec: spec,
#166	routerUsage: (routerJson as any)?.usage ?? null,
#167	executorUsage: (execJson as any)?.usage ?? null,
#168	},
#169	};
#170	}
#171
#172	async function maybeAgenticJson(ctx: UserContext, body: ChatPayload): Promise<Record<string, unknown> \| null> {
#173	const message = lastUserMessage(body.messages);
#174	const sessionId = sessionIdFromBody(body);
#175	if (isApprovalText(message)) {
#176	const pending = takePendingTool(ctx, sessionId);
#177	if (!pending) return completion('I do not have a pending tool call in this chat. Send the action again and I will show a confirmation card.');
#178	const result = await executeToolCall(ctx, pending);
#179	return completion(resultMarkdown(result), { model: 'aethon-agentic-tools', tool: pending.tool, audit_path: result.audit_path });
#180	}
#181	const plan = classifyAgenticPlan(message);
#182	if (plan) {
#183	if (!canRunPlan(ctx, plan)) {
#184	return completion(`I cannot run this plan for this account. One or more steps are limited to King's admin identity or to a member's own vault.`);
#185	}
#186	const pendingPlan = storePendingPlan(ctx, sessionId, plan);
#187	return completion(confirmationMarkdown(pendingPlan), { model: 'aethon-chief-of-staff', tool: 'agentic_plan', pending_tool_id: pendingPlan.id });
#188	}
#189	const call = classifyToolIntent(message);
#190	if (!call) return null;
#191	if (!canRunTool(ctx, call)) {
#192	return completion(`I cannot run ${call.tool} for this account. That action is limited to King's admin identity or to a member's own vault.`);
#193	}
#194	if (!call.requiresConfirmation) {
#195	const result = await executeToolCall(ctx, call);
#196	return completion(resultMarkdown(result), { model: 'aethon-agentic-tools', tool: call.tool, audit_path: result.audit_path });
#197	}
#198	const pending = storePendingTool(ctx, sessionId, call);
#199	return completion(confirmationMarkdown(pending), { model: 'aethon-agentic-tools', tool: call.tool, pending_tool_id: pending.id });
#200	}
#201
#202	function completion(content: string, extra: Record<string, unknown> = {}) {
#203	return {
#204	id: `chatcmpl-aethon-tool-${Date.now()}`,
#205	object: 'chat.completion',
#206	created: Math.floor(Date.now() / 1000),
#207	model: String(extra.model \|\| 'aethon-agentic-tools'),
#208	choices: [{
#209	index: 0,
#210	message: { role: 'assistant', content },
#211	finish_reason: 'stop',
#212	}],
#213	aethon: extra,
#214	};
#215	}
#216
#217	function modelToEndpoint(model?: string) {
#218	switch (model) {
#219	case 'openyourmind-qwen35':
#220	case 'openyourmind-local':
#221	case 'qwen3.6-local':
#222	return 'openyourmind-local';
#223	case 'claude-opus-4-7':
#224	return 'opus-4-7';
#225	case 'deepseek-v4-flash':
#226	return model;
#227	case 'kimi-k2-6':
#228	return 'deepseek-v4-flash';
#229	default:
#230	return model \|\| 'deepseek-v4-flash';
#231	}
#232	}
#233
#234	function contentToText(content: unknown): string {
#235	if (typeof content === 'string') return content;
#236	if (Array.isArray(content)) {
#237	return content
#238	.map((part) => {
#239	if (typeof part === 'string') return part;
#240	if (part && typeof part === 'object' && 'text' in part) {
#241	return String((part as { text?: unknown }).text ?? '');
#242	}
#243	return '';
#244	})
#245	.filter(Boolean)
#246	.join('\n');
#247	}
#248	return content == null ? '' : String(content);
#249	}
#250
#251	function lastUserMessage(messages: ChatPayload['messages']): string {
#252	const allMessages = Array.isArray(messages) ? messages : [];
#253	for (let i = allMessages.length - 1; i >= 0; i -= 1) {
#254	if (allMessages[i]?.role === 'user') {
#255	return contentToText(allMessages[i].content);
#256	}
#257	}
#258	return '';
#259	}
#260
#261	function conversationHistory(messages: ChatPayload['messages']) {
#262	const allMessages = Array.isArray(messages) ? messages : [];
#263	let lastUserIndex = -1;
#264	for (let i = allMessages.length - 1; i >= 0; i -= 1) {
#265	if (allMessages[i]?.role === 'user') {
#266	lastUserIndex = i;
#267	break;
#268	}
#269	}
#270
#271	return allMessages
#272	.slice(0, lastUserIndex < 0 ? allMessages.length : lastUserIndex)
#273	.filter((message) => message?.role === 'user' \|\| message?.role === 'assistant')
#274	.map((message) => ({
#275	role: String(message.role),
#276	content: contentToText(message.content).slice(0, 3000),
#277	}))
#278	.filter((message) => message.content.trim())
#279	.slice(-12);
#280	}
#281
#282	async function callDaemon(ctx: UserContext, payload: ChatPayload): Promise<Record<string, unknown>> {
#283	const query = lastUserMessage(payload.messages);
#284	const endpoint = modelToEndpoint(payload.model);
#285	const daemonResult = await fetch(`${ctx.aethonUrl}/inference`, {
#286	method: 'POST',
#287	headers: {
#288	'Content-Type': 'application/json',
#289	},
#290	body: JSON.stringify({
#291	query,
#292	user_name: ctx.displayName,
#293	context: {
#294	route_hint: 'web',
#295	vault_user_id: ctx.vaultUserKey,
#296	vault_top_k: 2,
#297	force_endpoint: endpoint,
#298	requested_model: payload.model ?? endpoint,
#299	timeout: 300,
#300	member_tier: ctx.tier,
#301	canonical_member_id: ctx.canonicalMemberId,
#302	max_tokens: payload.max_tokens ?? 8192,
#303	conversation_history: conversationHistory(payload.messages),
#304	},
#305	}),
#306	});
#307
#308	const result = await responseJson(daemonResult) as any;
#309
#310	if (!daemonResult.ok \|\| result.error) {
#311	return { error: result.error ?? 'Daemon inference failed', detail: result, status: daemonResult.status };
#312	}
#313
#314	const content = String(result.content ?? result.response ?? result.text ?? '');
#315	return {
#316	id: `chatcmpl-aethon-${Date.now()}`,
#317	object: 'chat.completion',
#318	created: Math.floor(Date.now() / 1000),
#319	model: String(result.endpoint ?? endpoint),
#320	choices: [{
#321	index: 0,
#322	message: { role: 'assistant', content },
#323	finish_reason: String(result.finish_reason ?? (result.truncated ? 'length' : 'stop')),
#324	}],
#325	usage: result.usage ?? null,
#326	truncated: Boolean(result.truncated),
#327	truncation_notice: result.truncation_notice ?? '',
#328	aethon: {
#329	endpoint: result.endpoint ?? endpoint,
#330	reason: result.reason ?? null,
#331	middleware: result.middleware ?? null,
#332	metering: result.metering ?? null,
#333	finish_reason: result.finish_reason ?? null,
#334	truncated: Boolean(result.truncated),
#335	},
#336	};
#337	}
#338
#339	async function responseJson(response: Response): Promise<Record<string, unknown>> {
#340	const text = await response.text();
#341	if (!text.trim()) return { error: 'empty_response', status: response.status };
#342	try {
#343	return JSON.parse(text);
#344	} catch (error: any) {
#345	return {
#346	error: 'non_json_response',
#347	status: response.status,
#348	contentType: response.headers.get('content-type'),
#349	detail: String(error?.message ?? error),
#350	raw: text.slice(0, 1000),
#351	};
#352	}
#353	}
#354
#355	function shouldRunAsync(body: ChatPayload): boolean {
#356	if (body.model?.includes('opus') \|\| body.model?.includes('sonnet')) return true;
#357	if (body.delegation?.router?.includes('opus') \|\| body.delegation?.router?.includes('sonnet')) return true;
#358	if (body.delegation?.executor?.includes('opus') \|\| body.delegation?.executor?.includes('sonnet')) return true;
#359	return false;
#360	}
#361
#362	function pruneJobs() {
#363	const cutoff = Date.now() - 30 * 60 * 1000;
#364	for (const [id, job] of jobStore.entries()) {
#365	if (job.created < cutoff) jobStore.delete(id);
#366	}
#367	}
#368

z6Mku78K/living-os-cockpit