repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | use anyhow::{Context, Result}; |
| #2 | use reqwest::Client; |
| #3 | use serde::{Deserialize, Serialize}; |
| #4 | use std::path::{Path, PathBuf}; |
| #5 | use std::time::{Duration, Instant}; |
| #6 | use tokio::time::sleep; |
| #7 | |
| #8 | const API_URL: &str = "https://api.mistral.ai/v1/chat/completions"; |
| #9 | const MODEL: &str = "labs-leanstral-2603"; |
| #10 | const TIMEOUT_SECS: u64 = 180; |
| #11 | const MAX_RETRIES: u32 = 3; |
| #12 | const BACKOFF_BASE_MS: u64 = 2000; |
| #13 | |
| #14 | const SYSTEM_PROMPT: &str = r#"You are Leanstral, an expert Lean 4 proof engineer. |
| #15 | |
| #16 | GOAL: Produce a single Lean 4 module that COMPILES under Lean 4.15 + Mathlib 4.15 with <5% sorry usage. |
| #17 | |
| #18 | CRITICAL SYNTAX RULE - Parameter Naming Convention: |
| #19 | ALL function parameters MUST be prefixed with `p_` to avoid conflicts with Lean reserved keywords. |
| #20 | |
| #21 | Examples: |
| #22 | WRONG: `def transfer (from to amount : Nat) := ...` |
| #23 | RIGHT: `def transfer (p_from p_to p_amount : Nat) := ...` |
| #24 | |
| #25 | WRONG: `def withdraw (s amt : Nat) := ...` |
| #26 | RIGHT: `def withdraw (p_s p_amt : Nat) := ...` |
| #27 | |
| #28 | WRONG: `def initialize := ...` |
| #29 | RIGHT: `def initEscrow := ...` (for function names, use descriptive names like initEscrow, createEscrow, setupEscrow) |
| #30 | |
| #31 | Always use this pattern: |
| #32 | - Function parameters: `p_from`, `p_to`, `p_state`, `p_amount`, `p_key` |
| #33 | - Function names: `initEscrow`, `cancelEscrow`, `executeExchange` (avoid `initialize`) |
| #34 | - Structure fields: `isClosed`, `isActive`, `balance`, `authority` (no prefix needed for fields) |
| #35 | - Local variables in proofs: can use any name, but `p_` prefix is safe if unsure |
| #36 | |
| #37 | Hard constraints: |
| #38 | 1. Output exactly one Lean module in a SINGLE ```lean4 code block |
| #39 | 2. Do not output prose, explanations, headers, or multiple code blocks |
| #40 | 3. Each theorem/function/type appears exactly once |
| #41 | 4. Every theorem must include a complete proof body |
| #42 | 5. Aim for 0 `sorry`; absolute maximum is <5% of theorems using `sorry` |
| #43 | 6. Define every helper function, structure, theorem, and constant before first use |
| #44 | 7. Use only identifiers and constants that are defined in the file or are standard in Lean 4.15 / Mathlib 4.15 |
| #45 | 8. Do NOT invent library constants or theorem names such as `Nat.le_max`, `Nat.add_le_max`, `u64_max`, etc. If you need a bound, define it explicitly in the file |
| #46 | 9. Do NOT use `_` placeholders in theorem statements or definitions when Lean must infer a witness or value |
| #47 | 10. In particular, do NOT write propositions like `f x = some _` |
| #48 | 11. Instead, use an explicit existential: `∃ y, f x = some y` |
| #49 | 12. Do NOT use `.get!` in theorem statements |
| #50 | 13. Avoid `.get!` in proofs unless a prior hypothesis has already rewritten the expression to `some v` |
| #51 | 14. Prefer theorem statements of the form `f s = some s' -> ...` or `∃ s', f s = some s' ∧ ...` |
| #52 | 15. Prefer simple, executable definitions and simple theorem statements that are actually provable |
| #53 | 16. If a property is too strong for the chosen model, simplify the model or theorem statement so it compiles and proves cleanly |
| #54 | 17. Prefer `simp`, `rfl`, `constructor`, `aesop`, `omega`, `cases`, `rcases`, and explicit rewriting over brittle tactics |
| #55 | 18. Do not call tactics that do not match the goal shape; for example, do not use `split_ifs` unless an `if` expression is present in the goal or local hypotheses |
| #56 | 19. Avoid dependent pattern matching unless necessary |
| #57 | 20. Prefer total functions returning `Option` for state transitions when success/failure matters |
| #58 | |
| #59 | Modeling guidance: |
| #60 | - Prefer a small abstract model over a realistic but fragile one |
| #61 | - Use `Nat` for balances and account identifiers unless a stronger type is truly needed |
| #62 | - For arithmetic safety, if you need a `u64` bound, define it explicitly, for example: |
| #63 | `def U64_MAX : Nat := 2^64 - 1` |
| #64 | - State transitions should be simple and deterministic |
| #65 | - If proving full token conservation is difficult, define a simple total-balance helper and prove conservation by simplification |
| #66 | - If proving successful execution properties, make success explicit with hypotheses like `h : exchange s taker = some s'` |
| #67 | |
| #68 | CRITICAL - Using Support Libraries: |
| #69 | If the user prompt provides a "Support API" section listing pre-imported types, functions, or lemmas: |
| #70 | - You MUST use those definitions EXACTLY as provided |
| #71 | - Do NOT redefine ANY type, function, or lemma listed in the Support API |
| #72 | - Do NOT add fields to types defined in the Support API |
| #73 | - Do NOT assume types have fields beyond what is documented in the Support API |
| #74 | - Only define NEW helpers that are NOT in the Support API |
| #75 | - Example: If Support API says "Account has fields: key, authority, balance, writable" |
| #76 | then Account has ONLY those 4 fields. Do NOT access non-existent fields like "escrow_token_account" |
| #77 | - Example: If Support API defines "findByKey : List Account -> Pubkey -> Option Account" |
| #78 | then use it EXACTLY with those types. Do NOT pass an Account where a Pubkey is expected |
| #79 | - When the user says "open QEDGen.Solana", all types from that module are available |
| #80 | - Read the Support API documentation carefully and use types correctly |
| #81 | - If you need a custom type with different fields, define it with a DIFFERENT name |
| #82 | |
| #83 | Recommended structure: |
| #84 | - imports |
| #85 | - constants |
| #86 | - structures / inductives |
| #87 | - helper functions |
| #88 | - transition functions |
| #89 | - helper lemmas |
| #90 | - final theorems |
| #91 | |
| #92 | Pre-output checklist: |
| #93 | - No Lean keywords used as identifiers |
| #94 | - No `_` placeholders in theorem statements |
| #95 | - No `.get!` in theorem statements |
| #96 | - No invented constants or theorem names |
| #97 | - Every referenced name is defined or imported |
| #98 | - Every theorem is provable with the chosen model |
| #99 | - Output is exactly one `lean4` code block |
| #100 | |
| #101 | Example patterns you should imitate: |
| #102 | |
| #103 | Example 1: simple structure and definitional proof with p_ prefix |
| #104 | ```lean4 |
| #105 | import Mathlib |
| #106 | |
| #107 | structure State where |
| #108 | balance : Nat |
| #109 | |
| #110 | def credit (p_s : State) (p_amt : Nat) : State := |
| #111 | { p_s with balance := p_s.balance + p_amt } |
| #112 | |
| #113 | theorem credit_balance (p_s : State) (p_amt : Nat) : |
| #114 | (credit p_s p_amt).balance = p_s.balance + p_amt := by |
| #115 | simp [credit] |
| #116 | ``` |
| #117 | |
| #118 | Example 2: `Option` transition with explicit success witness and p_ prefix |
| #119 | ```lean4 |
| #120 | import Mathlib |
| #121 | |
| #122 | structure State where |
| #123 | balance : Nat |
| #124 | |
| #125 | def withdraw (p_s : State) (p_amt : Nat) : Option State := |
| #126 | if h : p_amt <= p_s.balance then |
| #127 | some { p_s with balance := p_s.balance - p_amt } |
| #128 | else |
| #129 | none |
| #130 | |
| #131 | theorem withdraw_success_balance (p_s p_s' : State) (p_amt : Nat) |
| #132 | (h : withdraw p_s p_amt = some p_s') : |
| #133 | p_s'.balance = p_s.balance - p_amt := by |
| #134 | simp [withdraw] at h |
| #135 | split_ifs at h with hle |
| #136 | · cases h |
| #137 | simp |
| #138 | · contradiction |
| #139 | ``` |
| #140 | |
| #141 | Example 3: success stated with an existential, not `some _`, with p_ prefix |
| #142 | ```lean4 |
| #143 | import Mathlib |
| #144 | |
| #145 | structure State where |
| #146 | openFlag : Bool |
| #147 | |
| #148 | def closeState (p_s : State) : Option State := |
| #149 | some { p_s with openFlag := false } |
| #150 | |
| #151 | theorem closeState_exists (p_s : State) : |
| #152 | ∃ s', closeState p_s = some s' := by |
| #153 | refine ⟨{ p_s with openFlag := false }, ?_⟩ |
| #154 | simp [closeState] |
| #155 | ``` |
| #156 | |
| #157 | Example 4: record update pattern inside a larger state with p_ prefix |
| #158 | ```lean4 |
| #159 | import Mathlib |
| #160 | |
| #161 | structure Escrow where |
| #162 | isClosed : Bool |
| #163 | |
| #164 | structure ProgramState where |
| #165 | escrow : Escrow |
| #166 | counter : Nat |
| #167 | |
| #168 | def markClosed (p_s : ProgramState) : ProgramState := |
| #169 | { p_s with escrow := { p_s.escrow with isClosed := true } } |
| #170 | |
| #171 | theorem markClosed_closed (p_s : ProgramState) : |
| #172 | (markClosed p_s).escrow.isClosed = true := by |
| #173 | simp [markClosed] |
| #174 | ``` |
| #175 | |
| #176 | Example 5: conjunction proof with `constructor` and p_ prefix |
| #177 | ```lean4 |
| #178 | import Mathlib |
| #179 | |
| #180 | theorem pairFacts (p_a p_b : Nat) : |
| #181 | p_a = p_a ∧ p_b = p_b := by |
| #182 | constructor |
| #183 | · rfl |
| #184 | · rfl |
| #185 | ``` |
| #186 | |
| #187 | Example 6: explicit arithmetic bound with a defined constant and p_ prefix |
| #188 | ```lean4 |
| #189 | import Mathlib |
| #190 | |
| #191 | def U64_MAX : Nat := 2^64 - 1 |
| #192 | |
| #193 | theorem bounded_add_safe (p_x p_y : Nat) |
| #194 | (hx : p_x <= U64_MAX) (hy : p_y <= U64_MAX) (hxy : p_x + p_y <= U64_MAX) : |
| #195 | p_x + p_y <= U64_MAX := by |
| #196 | omega |
| #197 | ``` |
| #198 | |
| #199 | Output requirements: |
| #200 | - Output plain Lean code only, inside a single ```lean4 code block |
| #201 | - Do NOT include any explanation before or after the code block |
| #202 | - Do NOT emit duplicate declarations |
| #203 | - Do NOT emit theorem stubs followed later by proofs |
| #204 | - If a theorem is too difficult, simplify the theorem statement before writing the proof |
| #205 | - Prefer compilable, modest theorems over ambitious but broken ones |
| #206 | |
| #207 | When in doubt, choose the simplest model and the simplest theorem statement that still captures the requested property and compiles cleanly."#; |
| #208 | |
| #209 | #[derive(Debug, Serialize)] |
| #210 | struct ChatMessage { |
| #211 | role: String, |
| #212 | content: String, |
| #213 | } |
| #214 | |
| #215 | #[derive(Debug, Serialize)] |
| #216 | struct ChatRequest { |
| #217 | model: String, |
| #218 | messages: Vec<ChatMessage>, |
| #219 | temperature: f64, |
| #220 | max_tokens: usize, |
| #221 | } |
| #222 | |
| #223 | #[derive(Debug, Deserialize)] |
| #224 | struct ChatChoice { |
| #225 | message: ChatMessageContent, |
| #226 | finish_reason: String, |
| #227 | } |
| #228 | |
| #229 | #[derive(Debug, Deserialize)] |
| #230 | struct ChatMessageContent { |
| #231 | content: String, |
| #232 | } |
| #233 | |
| #234 | #[derive(Debug, Deserialize)] |
| #235 | struct Usage { |
| #236 | prompt_tokens: usize, |
| #237 | completion_tokens: usize, |
| #238 | total_tokens: usize, |
| #239 | } |
| #240 | |
| #241 | #[derive(Debug, Deserialize)] |
| #242 | struct ChatResponse { |
| #243 | choices: Vec<ChatChoice>, |
| #244 | usage: Usage, |
| #245 | } |
| #246 | |
| #247 | #[derive(Debug, Clone, Serialize, Deserialize)] |
| #248 | pub struct CompletionMetadata { |
| #249 | pub index: usize, |
| #250 | pub sorry_count: usize, |
| #251 | pub elapsed_seconds: f64, |
| #252 | pub prompt_tokens: usize, |
| #253 | pub completion_tokens: usize, |
| #254 | pub total_tokens: usize, |
| #255 | pub finish_reason: String, |
| #256 | pub build_status: BuildStatus, |
| #257 | #[serde(skip_serializing_if = "Option::is_none")] |
| #258 | pub build_log_path: Option<PathBuf>, |
| #259 | } |
| #260 | |
| #261 | #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] |
| #262 | #[serde(rename_all = "snake_case")] |
| #263 | pub enum BuildStatus { |
| #264 | NotRun, |
| #265 | Success, |
| #266 | Failed, |
| #267 | Skipped, |
| #268 | } |
| #269 | |
| #270 | #[derive(Debug, Serialize, Deserialize)] |
| #271 | pub struct QedgenMetadata { |
| #272 | pub model: String, |
| #273 | pub passes: usize, |
| #274 | pub temperature: f64, |
| #275 | pub max_tokens: usize, |
| #276 | pub validate: bool, |
| #277 | pub completions: Vec<CompletionMetadata>, |
| #278 | pub best_completion_index: usize, |
| #279 | pub best_sorry_count: usize, |
| #280 | pub best_selection_reason: String, |
| #281 | } |
| #282 | |
| #283 | async fn call_mistral_api_with_system( |
| #284 | client: &Client, |
| #285 | prompt: &str, |
| #286 | api_key: &str, |
| #287 | temperature: f64, |
| #288 | max_tokens: usize, |
| #289 | system_prompt: &str, |
| #290 | ) -> Result<(String, f64, Usage, String)> { |
| #291 | let request = ChatRequest { |
| #292 | model: MODEL.to_string(), |
| #293 | messages: vec![ |
| #294 | ChatMessage { |
| #295 | role: "system".to_string(), |
| #296 | content: system_prompt.to_string(), |
| #297 | }, |
| #298 | ChatMessage { |
| #299 | role: "user".to_string(), |
| #300 | content: prompt.to_string(), |
| #301 | }, |
| #302 | ], |
| #303 | temperature, |
| #304 | max_tokens, |
| #305 | }; |
| #306 | |
| #307 | for attempt in 0..MAX_RETRIES { |
| #308 | let start = Instant::now(); |
| #309 | let response = client |
| #310 | .post(API_URL) |
| #311 | .header("Content-Type", "application/json") |
| #312 | .header("Authorization", format!("Bearer {}", api_key)) |
| #313 | .json(&request) |
| #314 | .timeout(Duration::from_secs(TIMEOUT_SECS)) |
| #315 | .send() |
| #316 | .await; |
| #317 | |
| #318 | let elapsed = start.elapsed().as_secs_f64(); |
| #319 | |
| #320 | match response { |
| #321 | Ok(resp) => { |
| #322 | let status = resp.status(); |
| #323 | if status.is_success() { |
| #324 | let body: ChatResponse = resp.json().await?; |
| #325 | let content = body |
| #326 | .choices |
| #327 | .first() |
| #328 | .context("No choices in response")? |
| #329 | .message |
| #330 | .content |
| #331 | .clone(); |
| #332 | let finish_reason = body |
| #333 | .choices |
| #334 | .first() |
| #335 | .context("No choices in response")? |
| #336 | .finish_reason |
| #337 | .clone(); |
| #338 | return Ok((content, elapsed, body.usage, finish_reason)); |
| #339 | } else if status.as_u16() == 429 { |
| #340 | let wait = BACKOFF_BASE_MS * 2_u64.pow(attempt); |
| #341 | eprintln!( |
| #342 | " Rate limited (429). Retrying in {}s... (attempt {}/{})", |
| #343 | wait / 1000, |
| #344 | attempt + 1, |
| #345 | MAX_RETRIES |
| #346 | ); |
| #347 | sleep(Duration::from_millis(wait)).await; |
| #348 | continue; |
| #349 | } else if status.as_u16() == 401 { |
| #350 | anyhow::bail!("Invalid or missing MISTRAL_API_KEY. Get one at https://console.mistral.ai"); |
| #351 | } else if status.as_u16() == 403 { |
| #352 | let error_body = resp.text().await.unwrap_or_default(); |
| #353 | if error_body.contains("labs_not_enabled") { |
| #354 | anyhow::bail!("The Leanstral Labs model is not enabled for this Mistral organization.\nAsk an org admin to enable Labs models at https://admin.mistral.ai/plateforme/privacy and retry."); |
| #355 | } else { |
| #356 | anyhow::bail!("HTTP 403: {}", error_body); |
| #357 | } |
| #358 | } else { |
| #359 | let error_body = resp.text().await.unwrap_or_default(); |
| #360 | eprintln!("ERROR: HTTP {}: {}", status, error_body); |
| #361 | if attempt < MAX_RETRIES - 1 { |
| #362 | sleep(Duration::from_millis(BACKOFF_BASE_MS * 2_u64.pow(attempt))).await; |
| #363 | continue; |
| #364 | } |
| #365 | anyhow::bail!("HTTP {}: {}", status, error_body); |
| #366 | } |
| #367 | } |
| #368 | Err(e) => { |
| #369 | eprintln!("ERROR: {}", e); |
| #370 | if attempt < MAX_RETRIES - 1 { |
| #371 | sleep(Duration::from_millis(BACKOFF_BASE_MS * 2_u64.pow(attempt))).await; |
| #372 | continue; |
| #373 | } |
| #374 | return Err(e.into()); |
| #375 | } |
| #376 | } |
| #377 | } |
| #378 | |
| #379 | anyhow::bail!("All retries exhausted") |
| #380 | } |
| #381 | |
| #382 | async fn call_mistral_api( |
| #383 | client: &Client, |
| #384 | prompt: &str, |
| #385 | api_key: &str, |
| #386 | temperature: f64, |
| #387 | max_tokens: usize, |
| #388 | ) -> Result<(String, f64, Usage, String)> { |
| #389 | call_mistral_api_with_system(client, prompt, api_key, temperature, max_tokens, SYSTEM_PROMPT).await |
| #390 | } |
| #391 | |
| #392 | fn extract_lean_code(content: &str) -> String { |
| #393 | // Extract code from ```lean or ```lean4 blocks |
| #394 | // (?s) enables dotall mode so . matches newlines |
| #395 | let re = regex::Regex::new(r"(?s)```lean4?\s*\n(.*?)```").unwrap(); |
| #396 | let mut extracted = Vec::new(); |
| #397 | |
| #398 | for cap in re.captures_iter(content) { |
| #399 | if let Some(code) = cap.get(1) { |
| #400 | extracted.push(code.as_str()); |
| #401 | } |
| #402 | } |
| #403 | |
| #404 | if !extracted.is_empty() { |
| #405 | // If we have multiple blocks, try to deduplicate them |
| #406 | if extracted.len() > 1 { |
| #407 | deduplicate_lean_blocks(&extracted) |
| #408 | } else { |
| #409 | extracted[0].to_string() |
| #410 | } |
| #411 | } else { |
| #412 | content.to_string() |
| #413 | } |
| #414 | } |
| #415 | |
| #416 | fn deduplicate_lean_blocks(blocks: &[&str]) -> String { |
| #417 | use std::collections::{HashMap, HashSet}; |
| #418 | |
| #419 | // Parse each block to find declarations (theorem, def, structure, inductive, etc.) |
| #420 | let decl_pattern = regex::Regex::new( |
| #421 | r"(?m)^(theorem|def|structure|inductive|class|instance|axiom|lemma)\s+([a-zA-Z_][a-zA-Z0-9_']*)" |
| #422 | ).unwrap(); |
| #423 | |
| #424 | // Map declaration names to their best implementation |
| #425 | let mut declarations: HashMap<String, (usize, &str, bool)> = HashMap::new(); |
| #426 | let mut imports = Vec::new(); |
| #427 | let mut seen_imports = HashSet::new(); |
| #428 | |
| #429 | for (block_idx, block) in blocks.iter().enumerate() { |
| #430 | // Collect imports from all blocks |
| #431 | for line in block.lines() { |
| #432 | if line.trim().starts_with("import ") { |
| #433 | let import_stmt = line.trim(); |
| #434 | if !seen_imports.contains(import_stmt) { |
| #435 | imports.push(import_stmt); |
| #436 | seen_imports.insert(import_stmt); |
| #437 | } |
| #438 | } |
| #439 | } |
| #440 | |
| #441 | // Find all declarations in this block |
| #442 | for cap in decl_pattern.captures_iter(block) { |
| #443 | if let (Some(_kind), Some(name_match)) = (cap.get(1), cap.get(2)) { |
| #444 | let name = name_match.as_str().to_string(); |
| #445 | let decl_start = cap.get(0).unwrap().start(); |
| #446 | |
| #447 | // Find the end of this declaration (next declaration or end of block) |
| #448 | let next_decl = decl_pattern.find_at(block, decl_start + 1); |
| #449 | let decl_end = next_decl.map(|m| m.start()).unwrap_or(block.len()); |
| #450 | let decl_text = &block[decl_start..decl_end]; |
| #451 | |
| #452 | // Determine if this has a real implementation |
| #453 | // A stub typically has `:= by` followed by nothing or just whitespace |
| #454 | let has_implementation = !is_stub(decl_text); |
| #455 | |
| #456 | // Keep the declaration with implementation, or the latest one if both are stubs |
| #457 | if let Some((existing_idx, _existing_text, existing_has_impl)) = declarations.get(&name) { |
| #458 | // Prefer the one with implementation |
| #459 | if has_implementation && !existing_has_impl { |
| #460 | declarations.insert(name, (block_idx, decl_text, has_implementation)); |
| #461 | } else if !has_implementation && *existing_has_impl { |
| #462 | // Keep existing |
| #463 | } else { |
| #464 | // Both have impl or both are stubs, keep the later one |
| #465 | if block_idx > *existing_idx { |
| #466 | declarations.insert(name, (block_idx, decl_text, has_implementation)); |
| #467 | } |
| #468 | } |
| #469 | } else { |
| #470 | declarations.insert(name, (block_idx, decl_text, has_implementation)); |
| #471 | } |
| #472 | } |
| #473 | } |
| #474 | } |
| #475 | |
| #476 | // If deduplication didn't help much, just join blocks |
| #477 | if declarations.is_empty() { |
| #478 | return blocks.join("\n\n"); |
| #479 | } |
| #480 | |
| #481 | // Reconstruct the code with deduplicated declarations |
| #482 | let mut result = String::new(); |
| #483 | |
| #484 | // Add imports first |
| #485 | if !imports.is_empty() { |
| #486 | result.push_str(&imports.join("\n")); |
| #487 | result.push_str("\n\n"); |
| #488 | } |
| #489 | |
| #490 | // Add all declarations in a reasonable order (by block index, then position) |
| #491 | let mut sorted_decls: Vec<_> = declarations.values().collect(); |
| #492 | sorted_decls.sort_by_key(|(block_idx, _, _)| *block_idx); |
| #493 | |
| #494 | for (_, decl_text, _) in sorted_decls { |
| #495 | result.push_str(decl_text); |
| #496 | result.push_str("\n\n"); |
| #497 | } |
| #498 | |
| #499 | result.trim_end().to_string() |
| #500 | } |
| #501 | |
| #502 | fn is_stub(decl_text: &str) -> bool { |
| #503 | // Check if this is a stub declaration (has := by but no proof body) |
| #504 | if let Some(by_pos) = decl_text.find(":= by") { |
| #505 | let after_by = &decl_text[by_pos + 5..].trim(); |
| #506 | // If there's nothing after `:= by` or just whitespace/comments, it's a stub |
| #507 | let meaningful_content = after_by.lines() |
| #508 | .map(|l| l.trim()) |
| #509 | .filter(|l| !l.is_empty() && !l.starts_with("--")) |
| #510 | .collect::<Vec<_>>(); |
| #511 | meaningful_content.is_empty() |
| #512 | } else { |
| #513 | false |
| #514 | } |
| #515 | } |
| #516 | |
| #517 | fn normalize_lean_code(code: &str) -> String { |
| #518 | let lines: Vec<&str> = code.lines().collect(); |
| #519 | let mut normalized_imports = Vec::new(); |
| #520 | let mut body_lines = Vec::new(); |
| #521 | let mut saw_mathlib_import = false; |
| #522 | |
| #523 | let import_re = regex::Regex::new(r"^import\s+Mathlib(\..+)?\s*$").unwrap(); |
| #524 | let import_general_re = regex::Regex::new(r"^import\s+").unwrap(); |
| #525 | |
| #526 | for line in lines { |
| #527 | if import_re.is_match(line) { |
| #528 | saw_mathlib_import = true; |
| #529 | continue; |
| #530 | } |
| #531 | if import_general_re.is_match(line) { |
| #532 | normalized_imports.push(line); |
| #533 | continue; |
| #534 | } |
| #535 | body_lines.push(line); |
| #536 | } |
| #537 | |
| #538 | let mut import_block = Vec::new(); |
| #539 | // Always add Mathlib.Tactic import for tactics like split_ifs |
| #540 | if !saw_mathlib_import { |
| #541 | import_block.push("import Mathlib.Tactic"); |
| #542 | } else { |
| #543 | import_block.push("import Mathlib"); |
| #544 | } |
| #545 | import_block.extend(normalized_imports); |
| #546 | |
| #547 | let trimmed_body = body_lines.join("\n").trim_start().to_string(); |
| #548 | format!("{}\n\n{}\n", import_block.join("\n"), trimmed_body).trim_end().to_string() + "\n" |
| #549 | } |
| #550 | |
| #551 | fn count_sorry(code: &str) -> usize { |
| #552 | let re = regex::Regex::new(r"\bsorry\b").unwrap(); |
| #553 | re.find_iter(code).count() |
| #554 | } |
| #555 | |
| #556 | pub async fn generate_proofs( |
| #557 | prompt: &str, |
| #558 | output_dir: &Path, |
| #559 | passes: usize, |
| #560 | temperature: f64, |
| #561 | max_tokens: usize, |
| #562 | validate: bool, |
| #563 | validation_workspace: Option<&Path>, |
| #564 | ) -> Result<()> { |
| #565 | let api_key = std::env::var("MISTRAL_API_KEY") |
| #566 | .context("MISTRAL_API_KEY environment variable not set.\nGet a free key at https://console.mistral.ai\nThen run: export MISTRAL_API_KEY=your_key_here")?; |
| #567 | |
| #568 | // Create output directories |
| #569 | std::fs::create_dir_all(output_dir)?; |
| #570 | let attempts_dir = output_dir.join("attempts"); |
| #571 | std::fs::create_dir_all(&attempts_dir)?; |
| #572 | |
| #573 | // Set up Lean project files |
| #574 | crate::project::setup_lean_project(output_dir)?; |
| #575 | |
| #576 | // Save the prompt |
| #577 | std::fs::write(output_dir.join("prompt.txt"), prompt)?; |
| #578 | |
| #579 | eprintln!("Calling Leanstral model ({}) with pass@{}...", MODEL, passes); |
| #580 | |
| #581 | let client = Client::new(); |
| #582 | let mut metadata = QedgenMetadata { |
| #583 | model: MODEL.to_string(), |
| #584 | passes, |
| #585 | temperature, |
| #586 | max_tokens, |
| #587 | validate, |
| #588 | completions: Vec::new(), |
| #589 | best_completion_index: 0, |
| #590 | best_sorry_count: usize::MAX, |
| #591 | best_selection_reason: "fewest_sorry".to_string(), |
| #592 | }; |
| #593 | |
| #594 | let mut best_idx = 0; |
| #595 | let mut best_sorry_count = usize::MAX; |
| #596 | |
| #597 | for i in 0..passes { |
| #598 | eprint!(" Pass {}/{}... ", i + 1, passes); |
| #599 | let (content, elapsed, usage, finish_reason) = |
| #600 | call_mistral_api(&client, prompt, &api_key, temperature, max_tokens).await?; |
| #601 | |
| #602 | let lean_code = normalize_lean_code(&extract_lean_code(&content)); |
| #603 | let sorry_count = count_sorry(&lean_code); |
| #604 | |
| #605 | eprintln!( |
| #606 | "done ({:.1}s, {} tokens, {} sorry)", |
| #607 | elapsed, usage.completion_tokens, sorry_count |
| #608 | ); |
| #609 | |
| #610 | // Save raw and extracted code |
| #611 | std::fs::write( |
| #612 | attempts_dir.join(format!("completion_{}_raw.txt", i)), |
| #613 | &content, |
| #614 | )?; |
| #615 | std::fs::write(attempts_dir.join(format!("completion_{}.lean", i)), &lean_code)?; |
| #616 | |
| #617 | metadata.completions.push(CompletionMetadata { |
| #618 | index: i, |
| #619 | sorry_count, |
| #620 | elapsed_seconds: elapsed, |
| #621 | prompt_tokens: usage.prompt_tokens, |
| #622 | completion_tokens: usage.completion_tokens, |
| #623 | total_tokens: usage.total_tokens, |
| #624 | finish_reason, |
| #625 | build_status: BuildStatus::NotRun, |
| #626 | build_log_path: None, |
| #627 | }); |
| #628 | |
| #629 | if sorry_count < best_sorry_count { |
| #630 | best_sorry_count = sorry_count; |
| #631 | best_idx = i; |
| #632 | } |
| #633 | } |
| #634 | |
| #635 | if validate { |
| #636 | eprintln!("\nValidating completions with 'lake build Best'..."); |
| #637 | let mut ranked_candidates = metadata.completions.clone(); |
| #638 | ranked_candidates.sort_by(|a, b| { |
| #639 | if a.sorry_count != b.sorry_count { |
| #640 | a.sorry_count.cmp(&b.sorry_count) |
| #641 | } else { |
| #642 | a.index.cmp(&b.index) |
| #643 | } |
| #644 | }); |
| #645 | |
| #646 | let mut found_validated = false; |
| #647 | for candidate in ranked_candidates { |
| #648 | let candidate_lean = |
| #649 | std::fs::read_to_string(attempts_dir.join(format!("completion_{}.lean", candidate.index)))?; |
| #650 | std::fs::write(output_dir.join("Best.lean"), &candidate_lean)?; |
| #651 | |
| #652 | eprint!( |
| #653 | " Validate completion_{}.lean ({} sorry)... ", |
| #654 | candidate.index, candidate.sorry_count |
| #655 | ); |
| #656 | let validation = crate::validate::validate_completion(output_dir, candidate.index, validation_workspace).await?; |
| #657 | |
| #658 | // Update metadata |
| #659 | let meta = metadata |
| #660 | .completions |
| #661 | .iter_mut() |
| #662 | .find(|m| m.index == candidate.index) |
| #663 | .unwrap(); |
| #664 | meta.build_status = validation.status; |
| #665 | meta.build_log_path = validation.log_path; |
| #666 | |
| #667 | eprintln!("{:?}", validation.status); |
| #668 | |
| #669 | if validation.status == BuildStatus::Success { |
| #670 | best_idx = candidate.index; |
| #671 | best_sorry_count = candidate.sorry_count; |
| #672 | metadata.best_selection_reason = "validated_build".to_string(); |
| #673 | found_validated = true; |
| #674 | break; |
| #675 | } |
| #676 | } |
| #677 | |
| #678 | if !found_validated { |
| #679 | metadata.best_selection_reason = "fewest_sorry_no_valid_build".to_string(); |
| #680 | } |
| #681 | } |
| #682 | |
| #683 | metadata.best_completion_index = best_idx; |
| #684 | metadata.best_sorry_count = best_sorry_count; |
| #685 | |
| #686 | // Save metadata |
| #687 | std::fs::write( |
| #688 | output_dir.join("metadata.json"), |
| #689 | serde_json::to_string_pretty(&metadata)?, |
| #690 | )?; |
| #691 | |
| #692 | // Copy best completion to Best.lean |
| #693 | let best_lean = |
| #694 | std::fs::read_to_string(attempts_dir.join(format!("completion_{}.lean", best_idx)))?; |
| #695 | std::fs::write(output_dir.join("Best.lean"), &best_lean)?; |
| #696 | |
| #697 | eprintln!("\nResults saved to {}/", output_dir.display()); |
| #698 | eprintln!( |
| #699 | "Best completion: Best.lean (from attempts/completion_{}.lean, {} sorry)", |
| #700 | best_idx, best_sorry_count |
| #701 | ); |
| #702 | eprintln!("Selection reason: {}", metadata.best_selection_reason); |
| #703 | eprintln!("\nTo verify the proof:"); |
| #704 | eprintln!(" cd {}", output_dir.display()); |
| #705 | eprintln!(" lake build # Build and verify proofs"); |
| #706 | |
| #707 | // Print best completion to stdout |
| #708 | println!("{}", best_lean); |
| #709 | |
| #710 | Ok(()) |
| #711 | } |
| #712 | |
| #713 | const SORRY_FILL_SYSTEM_PROMPT: &str = r#"You are Leanstral, an expert Lean 4 proof engineer. |
| #714 | |
| #715 | TASK: Replace the `sorry` placeholder(s) in the provided Lean 4 file with valid proof tactics. |
| #716 | |
| #717 | Rules: |
| #718 | 1. Return the COMPLETE file with sorry markers replaced by working proofs |
| #719 | 2. Do NOT change any definitions, structures, or theorem signatures |
| #720 | 3. Do NOT add or remove imports |
| #721 | 4. Do NOT add new theorems or definitions |
| #722 | 5. Only modify the proof bodies where `sorry` appears |
| #723 | 6. Use standard Lean 4.15 / Mathlib 4.15 tactics |
| #724 | 7. Prefer: unfold, split_ifs, cases, omega, rfl, exact, constructor, contradiction |
| #725 | 8. When a named predicate appears in both a hypothesis and the goal, unfold it in BOTH: `unfold pred at h ⊢` |
| #726 | 9. Output the complete file in a single ```lean4 code block |
| #727 | 10. If you cannot fill a sorry, leave it as sorry"#; |
| #728 | |
| #729 | /// Parse sorry locations from a Lean file |
| #730 | fn find_sorry_locations(code: &str) -> Vec<(usize, String)> { |
| #731 | let mut locations = Vec::new(); |
| #732 | let sorry_re = regex::Regex::new(r"\bsorry\b").unwrap(); |
| #733 | |
| #734 | // Find enclosing theorem for each sorry |
| #735 | let theorem_re = regex::Regex::new( |
| #736 | r"(?m)^(theorem|lemma)\s+([a-zA-Z_][a-zA-Z0-9_']*)" |
| #737 | ).unwrap(); |
| #738 | |
| #739 | for mat in sorry_re.find_iter(code) { |
| #740 | let line_num = code[..mat.start()].matches('\n').count() + 1; |
| #741 | |
| #742 | // Find the enclosing theorem |
| #743 | let before = &code[..mat.start()]; |
| #744 | let enclosing = theorem_re |
| #745 | .captures_iter(before) |
| #746 | .last() |
| #747 | .and_then(|c| c.get(2)) |
| #748 | .map(|m| m.as_str().to_string()) |
| #749 | .unwrap_or_else(|| "unknown".to_string()); |
| #750 | |
| #751 | locations.push((line_num, enclosing)); |
| #752 | } |
| #753 | |
| #754 | locations |
| #755 | } |
| #756 | |
| #757 | /// Fill sorry markers in a Lean file using Leanstral |
| #758 | pub async fn fill_sorry( |
| #759 | file_path: &Path, |
| #760 | output_path: Option<&Path>, |
| #761 | passes: usize, |
| #762 | temperature: f64, |
| #763 | max_tokens: usize, |
| #764 | validate: bool, |
| #765 | ) -> Result<()> { |
| #766 | let api_key = std::env::var("MISTRAL_API_KEY") |
| #767 | .context("MISTRAL_API_KEY environment variable not set.\nGet a free key at https://console.mistral.ai")?; |
| #768 | |
| #769 | let code = std::fs::read_to_string(file_path) |
| #770 | .context(format!("Cannot read file: {}", file_path.display()))?; |
| #771 | |
| #772 | let sorry_locations = find_sorry_locations(&code); |
| #773 | if sorry_locations.is_empty() { |
| #774 | eprintln!("No sorry markers found in {}", file_path.display()); |
| #775 | return Ok(()); |
| #776 | } |
| #777 | |
| #778 | eprintln!( |
| #779 | "Found {} sorry marker(s) in {}:", |
| #780 | sorry_locations.len(), |
| #781 | file_path.display() |
| #782 | ); |
| #783 | for (line, theorem) in &sorry_locations { |
| #784 | eprintln!(" line {}: in {}", line, theorem); |
| #785 | } |
| #786 | |
| #787 | let prompt = format!( |
| #788 | "Fill all `sorry` placeholders in this Lean 4 file with valid proofs.\n\n```lean4\n{}\n```", |
| #789 | code |
| #790 | ); |
| #791 | |
| #792 | let client = Client::new(); |
| #793 | eprintln!( |
| #794 | "\nCalling Leanstral model ({}) with pass@{}...", |
| #795 | MODEL, passes |
| #796 | ); |
| #797 | |
| #798 | let mut best_code: Option<String> = None; |
| #799 | let mut best_sorry_count = sorry_locations.len(); |
| #800 | |
| #801 | for i in 0..passes { |
| #802 | eprint!(" Pass {}/{}... ", i + 1, passes); |
| #803 | let result = call_mistral_api_with_system( |
| #804 | &client, |
| #805 | &prompt, |
| #806 | &api_key, |
| #807 | temperature, |
| #808 | max_tokens, |
| #809 | SORRY_FILL_SYSTEM_PROMPT, |
| #810 | ) |
| #811 | .await; |
| #812 | |
| #813 | match result { |
| #814 | Ok((content, elapsed, usage, _)) => { |
| #815 | let filled = extract_lean_code(&content); |
| #816 | let sorry_count = count_sorry(&filled); |
| #817 | eprintln!("done ({:.1}s, {} tokens, {} sorry remaining)", elapsed, usage.completion_tokens, sorry_count); |
| #818 | |
| #819 | if sorry_count < best_sorry_count { |
| #820 | best_sorry_count = sorry_count; |
| #821 | best_code = Some(filled); |
| #822 | } |
| #823 | if sorry_count == 0 { |
| #824 | break; |
| #825 | } |
| #826 | } |
| #827 | Err(e) => { |
| #828 | eprintln!("error: {}", e); |
| #829 | } |
| #830 | } |
| #831 | } |
| #832 | |
| #833 | let output = output_path.unwrap_or(file_path); |
| #834 | |
| #835 | if let Some(filled_code) = best_code { |
| #836 | std::fs::write(output, &filled_code)?; |
| #837 | eprintln!( |
| #838 | "\nWrote filled proof to {} ({} sorry remaining)", |
| #839 | output.display(), |
| #840 | best_sorry_count |
| #841 | ); |
| #842 | |
| #843 | if validate { |
| #844 | let project_dir = output.parent().unwrap_or(Path::new(".")); |
| #845 | eprintln!("Validating with lake build..."); |
| #846 | let status = std::process::Command::new("lake") |
| #847 | .arg("build") |
| #848 | .current_dir(project_dir) |
| #849 | .status(); |
| #850 | match status { |
| #851 | Ok(s) if s.success() => eprintln!("Validation: Success"), |
| #852 | Ok(_) => eprintln!("Validation: Failed (see errors above)"), |
| #853 | Err(e) => eprintln!("Validation: Could not run lake: {}", e), |
| #854 | } |
| #855 | } |
| #856 | } else { |
| #857 | eprintln!("\nNo improvement found. Original file unchanged."); |
| #858 | } |
| #859 | |
| #860 | Ok(()) |
| #861 | } |
| #862 | |
| #863 | |
| #864 | #[cfg(test)] |
| #865 | mod tests { |
| #866 | use super::*; |
| #867 | |
| #868 | #[test] |
| #869 | fn test_extract_lean_code_multiline() { |
| #870 | let input = r#"Here is some Lean code: |
| #871 | |
| #872 | ```lean4 |
| #873 | theorem add_comm (a b : Nat) : a + b = b + a := by |
| #874 | induction a with |
| #875 | | zero => simp |
| #876 | | succ a ih => simp [ih] |
| #877 | ``` |
| #878 | |
| #879 | That's the proof."#; |
| #880 | |
| #881 | let result = extract_lean_code(input); |
| #882 | assert!(result.contains("theorem add_comm")); |
| #883 | assert!(result.contains("induction a with")); |
| #884 | assert!(result.contains("| zero => simp")); |
| #885 | assert!(result.contains("| succ a ih => simp [ih]")); |
| #886 | } |
| #887 | |
| #888 | #[test] |
| #889 | fn test_extract_lean_code_single_line() { |
| #890 | let input = r#"```lean |
| #891 | def id (x : Nat) := x |
| #892 | ```"#; |
| #893 | |
| #894 | let result = extract_lean_code(input); |
| #895 | assert_eq!(result.trim(), "def id (x : Nat) := x"); |
| #896 | } |
| #897 | |
| #898 | #[test] |
| #899 | fn test_extract_lean_code_no_blocks() { |
| #900 | let input = "Just some plain text without code blocks"; |
| #901 | let result = extract_lean_code(input); |
| #902 | assert_eq!(result, input); |
| #903 | } |
| #904 | |
| #905 | #[test] |
| #906 | fn test_extract_lean_code_multiple_blocks() { |
| #907 | let input = r#"First block: |
| #908 | ```lean4 |
| #909 | def foo := 1 |
| #910 | ``` |
| #911 | |
| #912 | Second block: |
| #913 | ```lean |
| #914 | def bar := 2 |
| #915 | ```"#; |
| #916 | |
| #917 | let result = extract_lean_code(input); |
| #918 | assert!(result.contains("def foo := 1")); |
| #919 | assert!(result.contains("def bar := 2")); |
| #920 | } |
| #921 | |
| #922 | #[test] |
| #923 | fn test_deduplicate_theorem_stubs() { |
| #924 | let input = r#"Here are the theorems: |
| #925 | ```lean4 |
| #926 | theorem add_comm (a b : Nat) : a + b = b + a := by |
| #927 | ``` |
| #928 | |
| #929 | And here are the proofs: |
| #930 | ```lean4 |
| #931 | theorem add_comm (a b : Nat) : a + b = b + a := by |
| #932 | omega |
| #933 | ```"#; |
| #934 | |
| #935 | let result = extract_lean_code(input); |
| #936 | // Should only contain one instance of add_comm |
| #937 | let count = result.matches("theorem add_comm").count(); |
| #938 | assert_eq!(count, 1, "Should have exactly one add_comm theorem"); |
| #939 | // Should have the version with the proof body |
| #940 | assert!(result.contains("omega"), "Should contain the proof body"); |
| #941 | } |
| #942 | |
| #943 | #[test] |
| #944 | fn test_deduplicate_multiple_stubs_and_impls() { |
| #945 | let input = r#"Types: |
| #946 | ```lean4 |
| #947 | structure Point where |
| #948 | x : Nat |
| #949 | y : Nat |
| #950 | ``` |
| #951 | |
| #952 | Theorem stubs: |
| #953 | ```lean4 |
| #954 | theorem point_eq (p : Point) : p.x + p.y = p.y + p.x := by |
| #955 | ``` |
| #956 | |
| #957 | Proofs: |
| #958 | ```lean4 |
| #959 | theorem point_eq (p : Point) : p.x + p.y = p.y + p.x := by |
| #960 | omega |
| #961 | ```"#; |
| #962 | |
| #963 | let result = extract_lean_code(input); |
| #964 | let theorem_count = result.matches("theorem point_eq").count(); |
| #965 | assert_eq!(theorem_count, 1, "Should have exactly one point_eq theorem"); |
| #966 | let struct_count = result.matches("structure Point").count(); |
| #967 | assert_eq!(struct_count, 1, "Should have exactly one Point structure"); |
| #968 | assert!(result.contains("omega")); |
| #969 | } |
| #970 | } |
| #971 |