repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | export class BM25 { |
| #2 | private documents: string[][]; |
| #3 | private k1: number; |
| #4 | private b: number; |
| #5 | private avgDocLength: number; |
| #6 | private docFreq: Map<string, number>; |
| #7 | private docLengths: number[]; |
| #8 | private idf: Map<string, number>; |
| #9 | |
| #10 | constructor(documents: string[][], k1 = 1.5, b = 0.75) { |
| #11 | this.documents = documents; |
| #12 | this.k1 = k1; |
| #13 | this.b = b; |
| #14 | this.docLengths = documents.map((doc) => doc.length); |
| #15 | this.avgDocLength = |
| #16 | this.docLengths.reduce((a, b) => a + b, 0) / documents.length; |
| #17 | this.docFreq = new Map(); |
| #18 | this.idf = new Map(); |
| #19 | this.computeIdf(); |
| #20 | } |
| #21 | |
| #22 | private computeIdf() { |
| #23 | const N = this.documents.length; |
| #24 | |
| #25 | // Count document frequency for each term |
| #26 | for (const doc of this.documents) { |
| #27 | const terms = new Set(doc); |
| #28 | for (const term of terms) { |
| #29 | this.docFreq.set(term, (this.docFreq.get(term) || 0) + 1); |
| #30 | } |
| #31 | } |
| #32 | |
| #33 | // Compute IDF for each term |
| #34 | for (const [term, freq] of this.docFreq) { |
| #35 | this.idf.set(term, Math.log((N - freq + 0.5) / (freq + 0.5) + 1)); |
| #36 | } |
| #37 | } |
| #38 | |
| #39 | private score(query: string[], doc: string[], index: number): number { |
| #40 | let score = 0; |
| #41 | const docLength = this.docLengths[index]; |
| #42 | |
| #43 | for (const term of query) { |
| #44 | const tf = doc.filter((t) => t === term).length; |
| #45 | const idf = this.idf.get(term) || 0; |
| #46 | |
| #47 | score += |
| #48 | (idf * tf * (this.k1 + 1)) / |
| #49 | (tf + |
| #50 | this.k1 * (1 - this.b + (this.b * docLength) / this.avgDocLength)); |
| #51 | } |
| #52 | |
| #53 | return score; |
| #54 | } |
| #55 | |
| #56 | search(query: string[]): string[][] { |
| #57 | const scores = this.documents.map((doc, idx) => ({ |
| #58 | doc, |
| #59 | score: this.score(query, doc, idx), |
| #60 | })); |
| #61 | |
| #62 | return scores.sort((a, b) => b.score - a.score).map((item) => item.doc); |
| #63 | } |
| #64 | } |
| #65 |