my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	use anyhow::{Context, Result};
#2	use reqwest::Client;
#3	use serde::{Deserialize, Serialize};
#4	use std::path::{Path, PathBuf};
#5	use std::time::{Duration, Instant};
#6	use tokio::time::sleep;
#7
#8	const API_URL: &str = "https://api.mistral.ai/v1/chat/completions";
#9	const MODEL: &str = "labs-leanstral-2603";
#10	const TIMEOUT_SECS: u64 = 180;
#11	const MAX_RETRIES: u32 = 3;
#12	const BACKOFF_BASE_MS: u64 = 2000;
#13
#14	const SYSTEM_PROMPT: &str = r#"You are Leanstral, an expert Lean 4 proof engineer.
#15
#16	GOAL: Produce a single Lean 4 module that COMPILES under Lean 4.15 + Mathlib 4.15 with <5% sorry usage.
#17
#18	CRITICAL SYNTAX RULE - Parameter Naming Convention:
#19	ALL function parameters MUST be prefixed with `p_` to avoid conflicts with Lean reserved keywords.
#20
#21	Examples:
#22	WRONG: `def transfer (from to amount : Nat) := ...`
#23	RIGHT: `def transfer (p_from p_to p_amount : Nat) := ...`
#24
#25	WRONG: `def withdraw (s amt : Nat) := ...`
#26	RIGHT: `def withdraw (p_s p_amt : Nat) := ...`
#27
#28	WRONG: `def initialize := ...`
#29	RIGHT: `def initEscrow := ...` (for function names, use descriptive names like initEscrow, createEscrow, setupEscrow)
#30
#31	Always use this pattern:
#32	- Function parameters: `p_from`, `p_to`, `p_state`, `p_amount`, `p_key`
#33	- Function names: `initEscrow`, `cancelEscrow`, `executeExchange` (avoid `initialize`)
#34	- Structure fields: `isClosed`, `isActive`, `balance`, `authority` (no prefix needed for fields)
#35	- Local variables in proofs: can use any name, but `p_` prefix is safe if unsure
#36
#37	Hard constraints:
#38	1. Output exactly one Lean module in a SINGLE ```lean4 code block
#39	2. Do not output prose, explanations, headers, or multiple code blocks
#40	3. Each theorem/function/type appears exactly once
#41	4. Every theorem must include a complete proof body
#42	5. Aim for 0 `sorry`; absolute maximum is <5% of theorems using `sorry`
#43	6. Define every helper function, structure, theorem, and constant before first use
#44	7. Use only identifiers and constants that are defined in the file or are standard in Lean 4.15 / Mathlib 4.15
#45	8. Do NOT invent library constants or theorem names such as `Nat.le_max`, `Nat.add_le_max`, `u64_max`, etc. If you need a bound, define it explicitly in the file
#46	9. Do NOT use `_` placeholders in theorem statements or definitions when Lean must infer a witness or value
#47	10. In particular, do NOT write propositions like `f x = some _`
#48	11. Instead, use an explicit existential: `∃ y, f x = some y`
#49	12. Do NOT use `.get!` in theorem statements
#50	13. Avoid `.get!` in proofs unless a prior hypothesis has already rewritten the expression to `some v`
#51	14. Prefer theorem statements of the form `f s = some s' -> ...` or `∃ s', f s = some s' ∧ ...`
#52	15. Prefer simple, executable definitions and simple theorem statements that are actually provable
#53	16. If a property is too strong for the chosen model, simplify the model or theorem statement so it compiles and proves cleanly
#54	17. Prefer `simp`, `rfl`, `constructor`, `aesop`, `omega`, `cases`, `rcases`, and explicit rewriting over brittle tactics
#55	18. Do not call tactics that do not match the goal shape; for example, do not use `split_ifs` unless an `if` expression is present in the goal or local hypotheses
#56	19. Avoid dependent pattern matching unless necessary
#57	20. Prefer total functions returning `Option` for state transitions when success/failure matters
#58
#59	Modeling guidance:
#60	- Prefer a small abstract model over a realistic but fragile one
#61	- Use `Nat` for balances and account identifiers unless a stronger type is truly needed
#62	- For arithmetic safety, if you need a `u64` bound, define it explicitly, for example:
#63	`def U64_MAX : Nat := 2^64 - 1`
#64	- State transitions should be simple and deterministic
#65	- If proving full token conservation is difficult, define a simple total-balance helper and prove conservation by simplification
#66	- If proving successful execution properties, make success explicit with hypotheses like `h : exchange s taker = some s'`
#67
#68	CRITICAL - Using Support Libraries:
#69	If the user prompt provides a "Support API" section listing pre-imported types, functions, or lemmas:
#70	- You MUST use those definitions EXACTLY as provided
#71	- Do NOT redefine ANY type, function, or lemma listed in the Support API
#72	- Do NOT add fields to types defined in the Support API
#73	- Do NOT assume types have fields beyond what is documented in the Support API
#74	- Only define NEW helpers that are NOT in the Support API
#75	- Example: If Support API says "Account has fields: key, authority, balance, writable"
#76	then Account has ONLY those 4 fields. Do NOT access non-existent fields like "escrow_token_account"
#77	- Example: If Support API defines "findByKey : List Account -> Pubkey -> Option Account"
#78	then use it EXACTLY with those types. Do NOT pass an Account where a Pubkey is expected
#79	- When the user says "open QEDGen.Solana", all types from that module are available
#80	- Read the Support API documentation carefully and use types correctly
#81	- If you need a custom type with different fields, define it with a DIFFERENT name
#82
#83	Recommended structure:
#84	- imports
#85	- constants
#86	- structures / inductives
#87	- helper functions
#88	- transition functions
#89	- helper lemmas
#90	- final theorems
#91
#92	Pre-output checklist:
#93	- No Lean keywords used as identifiers
#94	- No `_` placeholders in theorem statements
#95	- No `.get!` in theorem statements
#96	- No invented constants or theorem names
#97	- Every referenced name is defined or imported
#98	- Every theorem is provable with the chosen model
#99	- Output is exactly one `lean4` code block
#100
#101	Example patterns you should imitate:
#102
#103	Example 1: simple structure and definitional proof with p_ prefix
#104	```lean4
#105	import Mathlib
#106
#107	structure State where
#108	balance : Nat
#109
#110	def credit (p_s : State) (p_amt : Nat) : State :=
#111	{ p_s with balance := p_s.balance + p_amt }
#112
#113	theorem credit_balance (p_s : State) (p_amt : Nat) :
#114	(credit p_s p_amt).balance = p_s.balance + p_amt := by
#115	simp [credit]
#116	```
#117
#118	Example 2: `Option` transition with explicit success witness and p_ prefix
#119	```lean4
#120	import Mathlib
#121
#122	structure State where
#123	balance : Nat
#124
#125	def withdraw (p_s : State) (p_amt : Nat) : Option State :=
#126	if h : p_amt <= p_s.balance then
#127	some { p_s with balance := p_s.balance - p_amt }
#128	else
#129	none
#130
#131	theorem withdraw_success_balance (p_s p_s' : State) (p_amt : Nat)
#132	(h : withdraw p_s p_amt = some p_s') :
#133	p_s'.balance = p_s.balance - p_amt := by
#134	simp [withdraw] at h
#135	split_ifs at h with hle
#136	· cases h
#137	simp
#138	· contradiction
#139	```
#140
#141	Example 3: success stated with an existential, not `some _`, with p_ prefix
#142	```lean4
#143	import Mathlib
#144
#145	structure State where
#146	openFlag : Bool
#147
#148	def closeState (p_s : State) : Option State :=
#149	some { p_s with openFlag := false }
#150
#151	theorem closeState_exists (p_s : State) :
#152	∃ s', closeState p_s = some s' := by
#153	refine ⟨{ p_s with openFlag := false }, ?_⟩
#154	simp [closeState]
#155	```
#156
#157	Example 4: record update pattern inside a larger state with p_ prefix
#158	```lean4
#159	import Mathlib
#160
#161	structure Escrow where
#162	isClosed : Bool
#163
#164	structure ProgramState where
#165	escrow : Escrow
#166	counter : Nat
#167
#168	def markClosed (p_s : ProgramState) : ProgramState :=
#169	{ p_s with escrow := { p_s.escrow with isClosed := true } }
#170
#171	theorem markClosed_closed (p_s : ProgramState) :
#172	(markClosed p_s).escrow.isClosed = true := by
#173	simp [markClosed]
#174	```
#175
#176	Example 5: conjunction proof with `constructor` and p_ prefix
#177	```lean4
#178	import Mathlib
#179
#180	theorem pairFacts (p_a p_b : Nat) :
#181	p_a = p_a ∧ p_b = p_b := by
#182	constructor
#183	· rfl
#184	· rfl
#185	```
#186
#187	Example 6: explicit arithmetic bound with a defined constant and p_ prefix
#188	```lean4
#189	import Mathlib
#190
#191	def U64_MAX : Nat := 2^64 - 1
#192
#193	theorem bounded_add_safe (p_x p_y : Nat)
#194	(hx : p_x <= U64_MAX) (hy : p_y <= U64_MAX) (hxy : p_x + p_y <= U64_MAX) :
#195	p_x + p_y <= U64_MAX := by
#196	omega
#197	```
#198
#199	Output requirements:
#200	- Output plain Lean code only, inside a single ```lean4 code block
#201	- Do NOT include any explanation before or after the code block
#202	- Do NOT emit duplicate declarations
#203	- Do NOT emit theorem stubs followed later by proofs
#204	- If a theorem is too difficult, simplify the theorem statement before writing the proof
#205	- Prefer compilable, modest theorems over ambitious but broken ones
#206
#207	When in doubt, choose the simplest model and the simplest theorem statement that still captures the requested property and compiles cleanly."#;
#208
#209	#[derive(Debug, Serialize)]
#210	struct ChatMessage {
#211	role: String,
#212	content: String,
#213	}
#214
#215	#[derive(Debug, Serialize)]
#216	struct ChatRequest {
#217	model: String,
#218	messages: Vec<ChatMessage>,
#219	temperature: f64,
#220	max_tokens: usize,
#221	}
#222
#223	#[derive(Debug, Deserialize)]
#224	struct ChatChoice {
#225	message: ChatMessageContent,
#226	finish_reason: String,
#227	}
#228
#229	#[derive(Debug, Deserialize)]
#230	struct ChatMessageContent {
#231	content: String,
#232	}
#233
#234	#[derive(Debug, Deserialize)]
#235	struct Usage {
#236	prompt_tokens: usize,
#237	completion_tokens: usize,
#238	total_tokens: usize,
#239	}
#240
#241	#[derive(Debug, Deserialize)]
#242	struct ChatResponse {
#243	choices: Vec<ChatChoice>,
#244	usage: Usage,
#245	}
#246
#247	#[derive(Debug, Clone, Serialize, Deserialize)]
#248	pub struct CompletionMetadata {
#249	pub index: usize,
#250	pub sorry_count: usize,
#251	pub elapsed_seconds: f64,
#252	pub prompt_tokens: usize,
#253	pub completion_tokens: usize,
#254	pub total_tokens: usize,
#255	pub finish_reason: String,
#256	pub build_status: BuildStatus,
#257	#[serde(skip_serializing_if = "Option::is_none")]
#258	pub build_log_path: Option<PathBuf>,
#259	}
#260
#261	#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#262	#[serde(rename_all = "snake_case")]
#263	pub enum BuildStatus {
#264	NotRun,
#265	Success,
#266	Failed,
#267	Skipped,
#268	}
#269
#270	#[derive(Debug, Serialize, Deserialize)]
#271	pub struct QedgenMetadata {
#272	pub model: String,
#273	pub passes: usize,
#274	pub temperature: f64,
#275	pub max_tokens: usize,
#276	pub validate: bool,
#277	pub completions: Vec<CompletionMetadata>,
#278	pub best_completion_index: usize,
#279	pub best_sorry_count: usize,
#280	pub best_selection_reason: String,
#281	}
#282
#283	async fn call_mistral_api_with_system(
#284	client: &Client,
#285	prompt: &str,
#286	api_key: &str,
#287	temperature: f64,
#288	max_tokens: usize,
#289	system_prompt: &str,
#290	) -> Result<(String, f64, Usage, String)> {
#291	let request = ChatRequest {
#292	model: MODEL.to_string(),
#293	messages: vec![
#294	ChatMessage {
#295	role: "system".to_string(),
#296	content: system_prompt.to_string(),
#297	},
#298	ChatMessage {
#299	role: "user".to_string(),
#300	content: prompt.to_string(),
#301	},
#302	],
#303	temperature,
#304	max_tokens,
#305	};
#306
#307	for attempt in 0..MAX_RETRIES {
#308	let start = Instant::now();
#309	let response = client
#310	.post(API_URL)
#311	.header("Content-Type", "application/json")
#312	.header("Authorization", format!("Bearer {}", api_key))
#313	.json(&request)
#314	.timeout(Duration::from_secs(TIMEOUT_SECS))
#315	.send()
#316	.await;
#317
#318	let elapsed = start.elapsed().as_secs_f64();
#319
#320	match response {
#321	Ok(resp) => {
#322	let status = resp.status();
#323	if status.is_success() {
#324	let body: ChatResponse = resp.json().await?;
#325	let content = body
#326	.choices
#327	.first()
#328	.context("No choices in response")?
#329	.message
#330	.content
#331	.clone();
#332	let finish_reason = body
#333	.choices
#334	.first()
#335	.context("No choices in response")?
#336	.finish_reason
#337	.clone();
#338	return Ok((content, elapsed, body.usage, finish_reason));
#339	} else if status.as_u16() == 429 {
#340	let wait = BACKOFF_BASE_MS * 2_u64.pow(attempt);
#341	eprintln!(
#342	" Rate limited (429). Retrying in {}s... (attempt {}/{})",
#343	wait / 1000,
#344	attempt + 1,
#345	MAX_RETRIES
#346	);
#347	sleep(Duration::from_millis(wait)).await;
#348	continue;
#349	} else if status.as_u16() == 401 {
#350	anyhow::bail!("Invalid or missing MISTRAL_API_KEY. Get one at https://console.mistral.ai");
#351	} else if status.as_u16() == 403 {
#352	let error_body = resp.text().await.unwrap_or_default();
#353	if error_body.contains("labs_not_enabled") {
#354	anyhow::bail!("The Leanstral Labs model is not enabled for this Mistral organization.\nAsk an org admin to enable Labs models at https://admin.mistral.ai/plateforme/privacy and retry.");
#355	} else {
#356	anyhow::bail!("HTTP 403: {}", error_body);
#357	}
#358	} else {
#359	let error_body = resp.text().await.unwrap_or_default();
#360	eprintln!("ERROR: HTTP {}: {}", status, error_body);
#361	if attempt < MAX_RETRIES - 1 {
#362	sleep(Duration::from_millis(BACKOFF_BASE_MS * 2_u64.pow(attempt))).await;
#363	continue;
#364	}
#365	anyhow::bail!("HTTP {}: {}", status, error_body);
#366	}
#367	}
#368	Err(e) => {
#369	eprintln!("ERROR: {}", e);
#370	if attempt < MAX_RETRIES - 1 {
#371	sleep(Duration::from_millis(BACKOFF_BASE_MS * 2_u64.pow(attempt))).await;
#372	continue;
#373	}
#374	return Err(e.into());
#375	}
#376	}
#377	}
#378
#379	anyhow::bail!("All retries exhausted")
#380	}
#381
#382	async fn call_mistral_api(
#383	client: &Client,
#384	prompt: &str,
#385	api_key: &str,
#386	temperature: f64,
#387	max_tokens: usize,
#388	) -> Result<(String, f64, Usage, String)> {
#389	call_mistral_api_with_system(client, prompt, api_key, temperature, max_tokens, SYSTEM_PROMPT).await
#390	}
#391
#392	fn extract_lean_code(content: &str) -> String {
#393	// Extract code from ```lean or ```lean4 blocks
#394	// (?s) enables dotall mode so . matches newlines
#395	let re = regex::Regex::new(r"(?s)```lean4?\s\n(.?)```").unwrap();
#396	let mut extracted = Vec::new();
#397
#398	for cap in re.captures_iter(content) {
#399	if let Some(code) = cap.get(1) {
#400	extracted.push(code.as_str());
#401	}
#402	}
#403
#404	if !extracted.is_empty() {
#405	// If we have multiple blocks, try to deduplicate them
#406	if extracted.len() > 1 {
#407	deduplicate_lean_blocks(&extracted)
#408	} else {
#409	extracted[0].to_string()
#410	}
#411	} else {
#412	content.to_string()
#413	}
#414	}
#415
#416	fn deduplicate_lean_blocks(blocks: &[&str]) -> String {
#417	use std::collections::{HashMap, HashSet};
#418
#419	// Parse each block to find declarations (theorem, def, structure, inductive, etc.)
#420	let decl_pattern = regex::Regex::new(
#421	r"(?m)^(theorem\|def\|structure\|inductive\|class\|instance\|axiom\|lemma)\s+([a-zA-Z_][a-zA-Z0-9_']*)"
#422	).unwrap();
#423
#424	// Map declaration names to their best implementation
#425	let mut declarations: HashMap<String, (usize, &str, bool)> = HashMap::new();
#426	let mut imports = Vec::new();
#427	let mut seen_imports = HashSet::new();
#428
#429	for (block_idx, block) in blocks.iter().enumerate() {
#430	// Collect imports from all blocks
#431	for line in block.lines() {
#432	if line.trim().starts_with("import ") {
#433	let import_stmt = line.trim();
#434	if !seen_imports.contains(import_stmt) {
#435	imports.push(import_stmt);
#436	seen_imports.insert(import_stmt);
#437	}
#438	}
#439	}
#440
#441	// Find all declarations in this block
#442	for cap in decl_pattern.captures_iter(block) {
#443	if let (Some(_kind), Some(name_match)) = (cap.get(1), cap.get(2)) {
#444	let name = name_match.as_str().to_string();
#445	let decl_start = cap.get(0).unwrap().start();
#446
#447	// Find the end of this declaration (next declaration or end of block)
#448	let next_decl = decl_pattern.find_at(block, decl_start + 1);
#449	let decl_end = next_decl.map(\|m\| m.start()).unwrap_or(block.len());
#450	let decl_text = &block[decl_start..decl_end];
#451
#452	// Determine if this has a real implementation
#453	// A stub typically has `:= by` followed by nothing or just whitespace
#454	let has_implementation = !is_stub(decl_text);
#455
#456	// Keep the declaration with implementation, or the latest one if both are stubs
#457	if let Some((existing_idx, _existing_text, existing_has_impl)) = declarations.get(&name) {
#458	// Prefer the one with implementation
#459	if has_implementation && !existing_has_impl {
#460	declarations.insert(name, (block_idx, decl_text, has_implementation));
#461	} else if !has_implementation && *existing_has_impl {
#462	// Keep existing
#463	} else {
#464	// Both have impl or both are stubs, keep the later one
#465	if block_idx > *existing_idx {
#466	declarations.insert(name, (block_idx, decl_text, has_implementation));
#467	}
#468	}
#469	} else {
#470	declarations.insert(name, (block_idx, decl_text, has_implementation));
#471	}
#472	}
#473	}
#474	}
#475
#476	// If deduplication didn't help much, just join blocks
#477	if declarations.is_empty() {
#478	return blocks.join("\n\n");
#479	}
#480
#481	// Reconstruct the code with deduplicated declarations
#482	let mut result = String::new();
#483
#484	// Add imports first
#485	if !imports.is_empty() {
#486	result.push_str(&imports.join("\n"));
#487	result.push_str("\n\n");
#488	}
#489
#490	// Add all declarations in a reasonable order (by block index, then position)
#491	let mut sorted_decls: Vec<_> = declarations.values().collect();
#492	sorted_decls.sort_by_key(\|(block_idx, _, _)\| *block_idx);
#493
#494	for (_, decl_text, _) in sorted_decls {
#495	result.push_str(decl_text);
#496	result.push_str("\n\n");
#497	}
#498
#499	result.trim_end().to_string()
#500	}
#501
#502	fn is_stub(decl_text: &str) -> bool {
#503	// Check if this is a stub declaration (has := by but no proof body)
#504	if let Some(by_pos) = decl_text.find(":= by") {
#505	let after_by = &decl_text[by_pos + 5..].trim();
#506	// If there's nothing after `:= by` or just whitespace/comments, it's a stub
#507	let meaningful_content = after_by.lines()
#508	.map(\|l\| l.trim())
#509	.filter(\|l\| !l.is_empty() && !l.starts_with("--"))
#510	.collect::<Vec<_>>();
#511	meaningful_content.is_empty()
#512	} else {
#513	false
#514	}
#515	}
#516
#517	fn normalize_lean_code(code: &str) -> String {
#518	let lines: Vec<&str> = code.lines().collect();
#519	let mut normalized_imports = Vec::new();
#520	let mut body_lines = Vec::new();
#521	let mut saw_mathlib_import = false;
#522
#523	let import_re = regex::Regex::new(r"^import\s+Mathlib(\..+)?\s*$").unwrap();
#524	let import_general_re = regex::Regex::new(r"^import\s+").unwrap();
#525
#526	for line in lines {
#527	if import_re.is_match(line) {
#528	saw_mathlib_import = true;
#529	continue;
#530	}
#531	if import_general_re.is_match(line) {
#532	normalized_imports.push(line);
#533	continue;
#534	}
#535	body_lines.push(line);
#536	}
#537
#538	let mut import_block = Vec::new();
#539	// Always add Mathlib.Tactic import for tactics like split_ifs
#540	if !saw_mathlib_import {
#541	import_block.push("import Mathlib.Tactic");
#542	} else {
#543	import_block.push("import Mathlib");
#544	}
#545	import_block.extend(normalized_imports);
#546
#547	let trimmed_body = body_lines.join("\n").trim_start().to_string();
#548	format!("{}\n\n{}\n", import_block.join("\n"), trimmed_body).trim_end().to_string() + "\n"
#549	}
#550
#551	fn count_sorry(code: &str) -> usize {
#552	let re = regex::Regex::new(r"\bsorry\b").unwrap();
#553	re.find_iter(code).count()
#554	}
#555
#556	pub async fn generate_proofs(
#557	prompt: &str,
#558	output_dir: &Path,
#559	passes: usize,
#560	temperature: f64,
#561	max_tokens: usize,
#562	validate: bool,
#563	validation_workspace: Option<&Path>,
#564	) -> Result<()> {
#565	let api_key = std::env::var("MISTRAL_API_KEY")
#566	.context("MISTRAL_API_KEY environment variable not set.\nGet a free key at https://console.mistral.ai\nThen run: export MISTRAL_API_KEY=your_key_here")?;
#567
#568	// Create output directories
#569	std::fs::create_dir_all(output_dir)?;
#570	let attempts_dir = output_dir.join("attempts");
#571	std::fs::create_dir_all(&attempts_dir)?;
#572
#573	// Set up Lean project files
#574	crate::project::setup_lean_project(output_dir)?;
#575
#576	// Save the prompt
#577	std::fs::write(output_dir.join("prompt.txt"), prompt)?;
#578
#579	eprintln!("Calling Leanstral model ({}) with pass@{}...", MODEL, passes);
#580
#581	let client = Client::new();
#582	let mut metadata = QedgenMetadata {
#583	model: MODEL.to_string(),
#584	passes,
#585	temperature,
#586	max_tokens,
#587	validate,
#588	completions: Vec::new(),
#589	best_completion_index: 0,
#590	best_sorry_count: usize::MAX,
#591	best_selection_reason: "fewest_sorry".to_string(),
#592	};
#593
#594	let mut best_idx = 0;
#595	let mut best_sorry_count = usize::MAX;
#596
#597	for i in 0..passes {
#598	eprint!(" Pass {}/{}... ", i + 1, passes);
#599	let (content, elapsed, usage, finish_reason) =
#600	call_mistral_api(&client, prompt, &api_key, temperature, max_tokens).await?;
#601
#602	let lean_code = normalize_lean_code(&extract_lean_code(&content));
#603	let sorry_count = count_sorry(&lean_code);
#604
#605	eprintln!(
#606	"done ({:.1}s, {} tokens, {} sorry)",
#607	elapsed, usage.completion_tokens, sorry_count
#608	);
#609
#610	// Save raw and extracted code
#611	std::fs::write(
#612	attempts_dir.join(format!("completion_{}_raw.txt", i)),
#613	&content,
#614	)?;
#615	std::fs::write(attempts_dir.join(format!("completion_{}.lean", i)), &lean_code)?;
#616
#617	metadata.completions.push(CompletionMetadata {
#618	index: i,
#619	sorry_count,
#620	elapsed_seconds: elapsed,
#621	prompt_tokens: usage.prompt_tokens,
#622	completion_tokens: usage.completion_tokens,
#623	total_tokens: usage.total_tokens,
#624	finish_reason,
#625	build_status: BuildStatus::NotRun,
#626	build_log_path: None,
#627	});
#628
#629	if sorry_count < best_sorry_count {
#630	best_sorry_count = sorry_count;
#631	best_idx = i;
#632	}
#633	}
#634
#635	if validate {
#636	eprintln!("\nValidating completions with 'lake build Best'...");
#637	let mut ranked_candidates = metadata.completions.clone();
#638	ranked_candidates.sort_by(\|a, b\| {
#639	if a.sorry_count != b.sorry_count {
#640	a.sorry_count.cmp(&b.sorry_count)
#641	} else {
#642	a.index.cmp(&b.index)
#643	}
#644	});
#645
#646	let mut found_validated = false;
#647	for candidate in ranked_candidates {
#648	let candidate_lean =
#649	std::fs::read_to_string(attempts_dir.join(format!("completion_{}.lean", candidate.index)))?;
#650	std::fs::write(output_dir.join("Best.lean"), &candidate_lean)?;
#651
#652	eprint!(
#653	" Validate completion_{}.lean ({} sorry)... ",
#654	candidate.index, candidate.sorry_count
#655	);
#656	let validation = crate::validate::validate_completion(output_dir, candidate.index, validation_workspace).await?;
#657
#658	// Update metadata
#659	let meta = metadata
#660	.completions
#661	.iter_mut()
#662	.find(\|m\| m.index == candidate.index)
#663	.unwrap();
#664	meta.build_status = validation.status;
#665	meta.build_log_path = validation.log_path;
#666
#667	eprintln!("{:?}", validation.status);
#668
#669	if validation.status == BuildStatus::Success {
#670	best_idx = candidate.index;
#671	best_sorry_count = candidate.sorry_count;
#672	metadata.best_selection_reason = "validated_build".to_string();
#673	found_validated = true;
#674	break;
#675	}
#676	}
#677
#678	if !found_validated {
#679	metadata.best_selection_reason = "fewest_sorry_no_valid_build".to_string();
#680	}
#681	}
#682
#683	metadata.best_completion_index = best_idx;
#684	metadata.best_sorry_count = best_sorry_count;
#685
#686	// Save metadata
#687	std::fs::write(
#688	output_dir.join("metadata.json"),
#689	serde_json::to_string_pretty(&metadata)?,
#690	)?;
#691
#692	// Copy best completion to Best.lean
#693	let best_lean =
#694	std::fs::read_to_string(attempts_dir.join(format!("completion_{}.lean", best_idx)))?;
#695	std::fs::write(output_dir.join("Best.lean"), &best_lean)?;
#696
#697	eprintln!("\nResults saved to {}/", output_dir.display());
#698	eprintln!(
#699	"Best completion: Best.lean (from attempts/completion_{}.lean, {} sorry)",
#700	best_idx, best_sorry_count
#701	);
#702	eprintln!("Selection reason: {}", metadata.best_selection_reason);
#703	eprintln!("\nTo verify the proof:");
#704	eprintln!(" cd {}", output_dir.display());
#705	eprintln!(" lake build # Build and verify proofs");
#706
#707	// Print best completion to stdout
#708	println!("{}", best_lean);
#709
#710	Ok(())
#711	}
#712
#713	const SORRY_FILL_SYSTEM_PROMPT: &str = r#"You are Leanstral, an expert Lean 4 proof engineer.
#714
#715	TASK: Replace the `sorry` placeholder(s) in the provided Lean 4 file with valid proof tactics.
#716
#717	Rules:
#718	1. Return the COMPLETE file with sorry markers replaced by working proofs
#719	2. Do NOT change any definitions, structures, or theorem signatures
#720	3. Do NOT add or remove imports
#721	4. Do NOT add new theorems or definitions
#722	5. Only modify the proof bodies where `sorry` appears
#723	6. Use standard Lean 4.15 / Mathlib 4.15 tactics
#724	7. Prefer: unfold, split_ifs, cases, omega, rfl, exact, constructor, contradiction
#725	8. When a named predicate appears in both a hypothesis and the goal, unfold it in BOTH: `unfold pred at h ⊢`
#726	9. Output the complete file in a single ```lean4 code block
#727	10. If you cannot fill a sorry, leave it as sorry"#;
#728
#729	/// Parse sorry locations from a Lean file
#730	fn find_sorry_locations(code: &str) -> Vec<(usize, String)> {
#731	let mut locations = Vec::new();
#732	let sorry_re = regex::Regex::new(r"\bsorry\b").unwrap();
#733
#734	// Find enclosing theorem for each sorry
#735	let theorem_re = regex::Regex::new(
#736	r"(?m)^(theorem\|lemma)\s+([a-zA-Z_][a-zA-Z0-9_']*)"
#737	).unwrap();
#738
#739	for mat in sorry_re.find_iter(code) {
#740	let line_num = code[..mat.start()].matches('\n').count() + 1;
#741
#742	// Find the enclosing theorem
#743	let before = &code[..mat.start()];
#744	let enclosing = theorem_re
#745	.captures_iter(before)
#746	.last()
#747	.and_then(\|c\| c.get(2))
#748	.map(\|m\| m.as_str().to_string())
#749	.unwrap_or_else(\|\| "unknown".to_string());
#750
#751	locations.push((line_num, enclosing));
#752	}
#753
#754	locations
#755	}
#756
#757	/// Fill sorry markers in a Lean file using Leanstral
#758	pub async fn fill_sorry(
#759	file_path: &Path,
#760	output_path: Option<&Path>,
#761	passes: usize,
#762	temperature: f64,
#763	max_tokens: usize,
#764	validate: bool,
#765	) -> Result<()> {
#766	let api_key = std::env::var("MISTRAL_API_KEY")
#767	.context("MISTRAL_API_KEY environment variable not set.\nGet a free key at https://console.mistral.ai")?;
#768
#769	let code = std::fs::read_to_string(file_path)
#770	.context(format!("Cannot read file: {}", file_path.display()))?;
#771
#772	let sorry_locations = find_sorry_locations(&code);
#773	if sorry_locations.is_empty() {
#774	eprintln!("No sorry markers found in {}", file_path.display());
#775	return Ok(());
#776	}
#777
#778	eprintln!(
#779	"Found {} sorry marker(s) in {}:",
#780	sorry_locations.len(),
#781	file_path.display()
#782	);
#783	for (line, theorem) in &sorry_locations {
#784	eprintln!(" line {}: in {}", line, theorem);
#785	}
#786
#787	let prompt = format!(
#788	"Fill all `sorry` placeholders in this Lean 4 file with valid proofs.\n\n```lean4\n{}\n```",
#789	code
#790	);
#791
#792	let client = Client::new();
#793	eprintln!(
#794	"\nCalling Leanstral model ({}) with pass@{}...",
#795	MODEL, passes
#796	);
#797
#798	let mut best_code: Option<String> = None;
#799	let mut best_sorry_count = sorry_locations.len();
#800
#801	for i in 0..passes {
#802	eprint!(" Pass {}/{}... ", i + 1, passes);
#803	let result = call_mistral_api_with_system(
#804	&client,
#805	&prompt,
#806	&api_key,
#807	temperature,
#808	max_tokens,
#809	SORRY_FILL_SYSTEM_PROMPT,
#810	)
#811	.await;
#812
#813	match result {
#814	Ok((content, elapsed, usage, _)) => {
#815	let filled = extract_lean_code(&content);
#816	let sorry_count = count_sorry(&filled);
#817	eprintln!("done ({:.1}s, {} tokens, {} sorry remaining)", elapsed, usage.completion_tokens, sorry_count);
#818
#819	if sorry_count < best_sorry_count {
#820	best_sorry_count = sorry_count;
#821	best_code = Some(filled);
#822	}
#823	if sorry_count == 0 {
#824	break;
#825	}
#826	}
#827	Err(e) => {
#828	eprintln!("error: {}", e);
#829	}
#830	}
#831	}
#832
#833	let output = output_path.unwrap_or(file_path);
#834
#835	if let Some(filled_code) = best_code {
#836	std::fs::write(output, &filled_code)?;
#837	eprintln!(
#838	"\nWrote filled proof to {} ({} sorry remaining)",
#839	output.display(),
#840	best_sorry_count
#841	);
#842
#843	if validate {
#844	let project_dir = output.parent().unwrap_or(Path::new("."));
#845	eprintln!("Validating with lake build...");
#846	let status = std::process::Command::new("lake")
#847	.arg("build")
#848	.current_dir(project_dir)
#849	.status();
#850	match status {
#851	Ok(s) if s.success() => eprintln!("Validation: Success"),
#852	Ok(_) => eprintln!("Validation: Failed (see errors above)"),
#853	Err(e) => eprintln!("Validation: Could not run lake: {}", e),
#854	}
#855	}
#856	} else {
#857	eprintln!("\nNo improvement found. Original file unchanged.");
#858	}
#859
#860	Ok(())
#861	}
#862
#863
#864	#[cfg(test)]
#865	mod tests {
#866	use super::*;
#867
#868	#[test]
#869	fn test_extract_lean_code_multiline() {
#870	let input = r#"Here is some Lean code:
#871
#872	```lean4
#873	theorem add_comm (a b : Nat) : a + b = b + a := by
#874	induction a with
#875	\| zero => simp
#876	\| succ a ih => simp [ih]
#877	```
#878
#879	That's the proof."#;
#880
#881	let result = extract_lean_code(input);
#882	assert!(result.contains("theorem add_comm"));
#883	assert!(result.contains("induction a with"));
#884	assert!(result.contains("\| zero => simp"));
#885	assert!(result.contains("\| succ a ih => simp [ih]"));
#886	}
#887
#888	#[test]
#889	fn test_extract_lean_code_single_line() {
#890	let input = r#"```lean
#891	def id (x : Nat) := x
#892	```"#;
#893
#894	let result = extract_lean_code(input);
#895	assert_eq!(result.trim(), "def id (x : Nat) := x");
#896	}
#897
#898	#[test]
#899	fn test_extract_lean_code_no_blocks() {
#900	let input = "Just some plain text without code blocks";
#901	let result = extract_lean_code(input);
#902	assert_eq!(result, input);
#903	}
#904
#905	#[test]
#906	fn test_extract_lean_code_multiple_blocks() {
#907	let input = r#"First block:
#908	```lean4
#909	def foo := 1
#910	```
#911
#912	Second block:
#913	```lean
#914	def bar := 2
#915	```"#;
#916
#917	let result = extract_lean_code(input);
#918	assert!(result.contains("def foo := 1"));
#919	assert!(result.contains("def bar := 2"));
#920	}
#921
#922	#[test]
#923	fn test_deduplicate_theorem_stubs() {
#924	let input = r#"Here are the theorems:
#925	```lean4
#926	theorem add_comm (a b : Nat) : a + b = b + a := by
#927	```
#928
#929	And here are the proofs:
#930	```lean4
#931	theorem add_comm (a b : Nat) : a + b = b + a := by
#932	omega
#933	```"#;
#934
#935	let result = extract_lean_code(input);
#936	// Should only contain one instance of add_comm
#937	let count = result.matches("theorem add_comm").count();
#938	assert_eq!(count, 1, "Should have exactly one add_comm theorem");
#939	// Should have the version with the proof body
#940	assert!(result.contains("omega"), "Should contain the proof body");
#941	}
#942
#943	#[test]
#944	fn test_deduplicate_multiple_stubs_and_impls() {
#945	let input = r#"Types:
#946	```lean4
#947	structure Point where
#948	x : Nat
#949	y : Nat
#950	```
#951
#952	Theorem stubs:
#953	```lean4
#954	theorem point_eq (p : Point) : p.x + p.y = p.y + p.x := by
#955	```
#956
#957	Proofs:
#958	```lean4
#959	theorem point_eq (p : Point) : p.x + p.y = p.y + p.x := by
#960	omega
#961	```"#;
#962
#963	let result = extract_lean_code(input);
#964	let theorem_count = result.matches("theorem point_eq").count();
#965	assert_eq!(theorem_count, 1, "Should have exactly one point_eq theorem");
#966	let struct_count = result.matches("structure Point").count();
#967	assert_eq!(struct_count, 1, "Should have exactly one Point structure");
#968	assert!(result.contains("omega"));
#969	}
#970	}
#971

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public