From 4de39a5871294158aedb8f803a6d0ffc0817eec2 Mon Sep 17 00:00:00 2001 From: Manoj Date: Mon, 6 Oct 2025 01:36:37 +0530 Subject: [PATCH] Fix: improve knowledge graph and better recall --- apps/webapp/app/lib/model.server.ts | 5 +- .../webapp/app/services/graphModels/entity.ts | 24 +- .../app/services/graphModels/episode.ts | 24 +- .../app/services/graphModels/statement.ts | 20 +- apps/webapp/app/services/prompts/nodes.ts | 21 +- apps/webapp/app/services/prompts/normalize.ts | 44 ++- .../webapp/app/services/prompts/statements.ts | 78 +++++- apps/webapp/app/services/search.server.ts | 35 +-- apps/webapp/app/services/search/rerank.ts | 92 +++++- apps/webapp/app/services/search/utils.ts | 265 ++++++++++++------ 10 files changed, 445 insertions(+), 163 deletions(-) diff --git a/apps/webapp/app/lib/model.server.ts b/apps/webapp/app/lib/model.server.ts index 09939cf..8f53541 100644 --- a/apps/webapp/app/lib/model.server.ts +++ b/apps/webapp/app/lib/model.server.ts @@ -83,8 +83,9 @@ export async function makeModelCall( const generateTextOptions: any = {} - - console.log('complexity:', complexity, 'model:', model) + logger.info( + `complexity: ${complexity}, model: ${model}`, + ); switch (model) { case "gpt-4.1-2025-04-14": case "gpt-4.1-mini-2025-04-14": diff --git a/apps/webapp/app/services/graphModels/entity.ts b/apps/webapp/app/services/graphModels/entity.ts index fa216a5..e71326c 100644 --- a/apps/webapp/app/services/graphModels/entity.ts +++ b/apps/webapp/app/services/graphModels/entity.ts @@ -76,16 +76,19 @@ export async function findSimilarEntities(params: { threshold: number; userId: string; }): Promise { + const limit = params.limit || 5; const query = ` - CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding) - YIELD node AS entity, score + CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding) + YIELD node AS entity + WHERE entity.userId = $userId + WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score WHERE score >= $threshold - AND entity.userId = $userId RETURN entity, score ORDER BY score DESC + LIMIT ${limit} `; - const result = await runQuery(query, { ...params, topK: params.limit }); + const result = await runQuery(query, { ...params }); return result.map((record) => { const entity = record.get("entity").properties; @@ -110,17 +113,20 @@ export async function findSimilarEntitiesWithSameType(params: { threshold: number; userId: string; }): Promise { + const limit = params.limit || 5; const query = ` - CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding) - YIELD node AS entity, score - WHERE score >= $threshold - AND entity.userId = $userId + CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding) + YIELD node AS entity + WHERE entity.userId = $userId AND entity.type = $entityType + WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score + WHERE score >= $threshold RETURN entity, score ORDER BY score DESC + LIMIT ${limit} `; - const result = await runQuery(query, { ...params, topK: params.limit }); + const result = await runQuery(query, { ...params }); return result.map((record) => { const entity = record.get("entity").properties; diff --git a/apps/webapp/app/services/graphModels/episode.ts b/apps/webapp/app/services/graphModels/episode.ts index 654c662..4e9875d 100644 --- a/apps/webapp/app/services/graphModels/episode.ts +++ b/apps/webapp/app/services/graphModels/episode.ts @@ -138,19 +138,21 @@ export async function searchEpisodesByEmbedding(params: { limit?: number; minSimilarity?: number; }) { + const limit = params.limit || 100; const query = ` - CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding) - YIELD node AS episode, score + CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding) + YIELD node AS episode WHERE episode.userId = $userId - AND score >= $minSimilarity + WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score + WHERE score >= $minSimilarity RETURN episode, score - ORDER BY score DESC`; + ORDER BY score DESC + LIMIT ${limit}`; const result = await runQuery(query, { embedding: params.embedding, minSimilarity: params.minSimilarity, userId: params.userId, - topK: 100, }); if (!result || result.length === 0) { @@ -281,20 +283,22 @@ export async function getRelatedEpisodesEntities(params: { limit?: number; minSimilarity?: number; }) { + const limit = params.limit || 100; const query = ` - CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding) - YIELD node AS episode, score + CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding) + YIELD node AS episode WHERE episode.userId = $userId - AND score >= $minSimilarity + WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score + WHERE score >= $minSimilarity OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity) WHERE entity IS NOT NULL - RETURN DISTINCT entity`; + RETURN DISTINCT entity + LIMIT ${limit}`; const result = await runQuery(query, { embedding: params.embedding, minSimilarity: params.minSimilarity, userId: params.userId, - topK: params.limit || 100, }); return result diff --git a/apps/webapp/app/services/graphModels/statement.ts b/apps/webapp/app/services/graphModels/statement.ts index 35411b3..cc43cd7 100644 --- a/apps/webapp/app/services/graphModels/statement.ts +++ b/apps/webapp/app/services/graphModels/statement.ts @@ -211,15 +211,18 @@ export async function findSimilarStatements({ excludeIds?: string[]; userId: string; }): Promise[]> { + const limit = 100; const query = ` - CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding) - YIELD node AS statement, score + CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $factEmbedding) + YIELD node AS statement WHERE statement.userId = $userId AND statement.invalidAt IS NULL - AND score >= $threshold ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""} + WITH statement, gds.similarity.cosine(statement.factEmbedding, $factEmbedding) AS score + WHERE score >= $threshold RETURN statement, score ORDER BY score DESC + LIMIT ${limit} `; const result = await runQuery(query, { @@ -227,7 +230,6 @@ export async function findSimilarStatements({ threshold, excludeIds, userId, - topK: 100, }); if (!result || result.length === 0) { @@ -410,14 +412,17 @@ export async function searchStatementsByEmbedding(params: { limit?: number; minSimilarity?: number; }) { + const limit = params.limit || 100; const query = ` - CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding) - YIELD node AS statement, score + CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $embedding) + YIELD node AS statement WHERE statement.userId = $userId AND statement.invalidAt IS NULL - AND score >= $minSimilarity + WITH statement, gds.similarity.cosine(statement.factEmbedding, $embedding) AS score + WHERE score >= $minSimilarity RETURN statement, score ORDER BY score DESC + LIMIT ${limit} `; const result = await runQuery(query, { @@ -425,7 +430,6 @@ export async function searchStatementsByEmbedding(params: { minSimilarity: params.minSimilarity, limit: params.limit, userId: params.userId, - topK: params.limit || 100, }); if (!result || result.length === 0) { diff --git a/apps/webapp/app/services/prompts/nodes.ts b/apps/webapp/app/services/prompts/nodes.ts index f189ed3..020f622 100644 --- a/apps/webapp/app/services/prompts/nodes.ts +++ b/apps/webapp/app/services/prompts/nodes.ts @@ -78,7 +78,24 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately. - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm"). -8. **Entity Name Extraction**: +8. **Entity Usefulness Test - SELECTIVITY FILTER**: + Before extracting an entity, ask: "Would this be useful in a search query?" + + ✅ EXTRACT (Searchable, persistent concepts): + - Named entities: "Sarah", "OpenAI", "Boston", "Albert Heijn" + - Domain concepts: "Preferences", "Home Address", "Annual Review", "Coding Practice" + - Measurements: "10/10 rating", "$2.5 million", "75% completion" + - Abstract concepts: "Lean Startup", "DevOps Culture", "Quality Standards" + + ❌ SKIP (Transient descriptors, low search value): + - Descriptive phrases: "new files", "existing code", "good practice", "necessary changes" + - Generic qualifiers: "better approach", "current version", "recent updates" + - Verb phrases: "creating documentation", "editing files", "avoiding mistakes" + - Adjective+noun combinations without specificity: "important meeting", "quick fix" + + **GUIDELINE**: Extract stable concepts that persist across contexts. Skip ephemeral descriptors tied to single actions. + +9. **Entity Name Extraction**: - Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers - When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car" - When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities @@ -87,7 +104,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John") - **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space") -9. **Temporal and Relationship Context Extraction**: +10. **Temporal and Relationship Context Extraction**: - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years") - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college") - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members") diff --git a/apps/webapp/app/services/prompts/normalize.ts b/apps/webapp/app/services/prompts/normalize.ts index 027a4f0..df92f21 100644 --- a/apps/webapp/app/services/prompts/normalize.ts +++ b/apps/webapp/app/services/prompts/normalize.ts @@ -11,19 +11,31 @@ CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION. Every separate fact, prefe OUTPUT GUIDELINES: - Simple content (1-2 facts): Use 1-2 concise sentences -- Complex content (multiple facts/categories): Use structured paragraphs organized by topic +- Complex content (multiple facts/categories): Use multiple focused paragraphs, each covering ONE topic area - Technical content: Preserve specifications, commands, paths, version numbers, configurations - Let content complexity determine output length - completeness over arbitrary brevity +- IMPORTANT: Break complex content into digestible paragraphs with natural sentence boundaries for easier fact extraction 1. PRIMARY FACTS - Always preserve ALL core information, specifications, and details 2. SPEAKER ATTRIBUTION - When content contains self-introductions ("I'm X", "My name is Y"), explicitly preserve speaker identity in third person (e.g., "the user introduced themselves as X" or "X introduced himself/herself") 3. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using timestamp 4. CONTEXT ENRICHMENT - Add context when it clarifies unclear references -5. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images -6. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges -7. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures -8. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships +5. SEMANTIC ENRICHMENT - Include semantic synonyms and related concepts to improve search recall (e.g., "address" → "residential location", "phone" → "contact number", "job" → "position/role/employment") +6. ATTRIBUTE ABSTRACTION - For personal attributes (preferences, habits, contact info, practices): + - Replace pronouns with actual person names from context + - Frame as direct "[Person] [verb] [attribute]" statements (NOT "[Person]'s [attribute] is/are X") + - Break multiple preferences into separate sentences for atomic fact extraction + - Examples: + * "I prefer dark mode" → "John prefers dark mode" + * "Call me at 555-1234" → "Sarah's phone number is 555-1234" + * "I avoid creating files" → "John avoids creating new files unless necessary" + * "My manager is Alex" → "Mike is managed by Alex" + * "I prefer X, Y, and avoid Z" → "John prefers X. John prefers Y. John avoids Z." +7. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images +8. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges +9. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures +10. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships CONTENT-ADAPTIVE APPROACH: - Conversations: Focus on dialogue context, relationships, emotional tone @@ -166,6 +178,28 @@ SIMPLE CONVERSATION - EMOTIONAL SUPPORT: - Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother." - Why: Simple temporal context, preserve emotional tone, no historical dumping +SEMANTIC ENRICHMENT FOR BETTER SEARCH: +- Original: "My address is 123 Main St. Boston, MA 02101" +- Enriched: "On October 3, 2025, the user's residential address (home location) is 123 Main St. Boston, MA 02101." +- Why: "residential address" and "home location" as synonyms improve semantic search for queries like "where does user live" or "residential location" + +- Original: "Call me at 555-1234" +- Enriched: "On October 3, 2025, the user's phone number (contact number) is 555-1234." +- Why: "phone number" and "contact number" as synonyms help queries like "how to contact" or "telephone" + +ATTRIBUTE ABSTRACTION FOR BETTER GRAPH RELATIONSHIPS: +- Original: "I avoid creating new files unless necessary" +- Enriched: "On October 3, 2025, John has a coding practice: avoid creating new files unless necessary." +- Why: Creates direct relationship from person to practice for better graph traversal + +- Original: "I prefer editing existing code over writing new code" +- Enriched: "On October 3, 2025, John prefers editing existing code over writing new code." +- Why: Direct preference relationship enables queries like "what are John's preferences" + +- Original: "My manager is Sarah" +- Enriched: "On October 3, 2025, Alex is managed by Sarah." +- Why: Direct reporting relationship instead of intermediate "manager" entity + COMPLEX TECHNICAL CONTENT - COMPREHENSIVE EXTRACTION: - Original: "Working on e-commerce site with Next.js 14. Run pnpm dev to start at port 3000. Using Prisma with PostgreSQL, Stripe for payments, Redis for caching. API routes in /api/*, database migrations in /prisma/migrations." - Enriched: "On January 15, 2024, the user is developing an e-commerce site built with Next.js 14. Development setup: pnpm dev starts local server on port 3000. Technology stack: Prisma ORM with PostgreSQL database, Stripe integration for payment processing, Redis for caching. Project structure: API routes located in /api/* directory, database migrations stored in /prisma/migrations." diff --git a/apps/webapp/app/services/prompts/statements.ts b/apps/webapp/app/services/prompts/statements.ts index a285907..50d9675 100644 --- a/apps/webapp/app/services/prompts/statements.ts +++ b/apps/webapp/app/services/prompts/statements.ts @@ -72,6 +72,53 @@ For each entity, systematically check these common patterns: - Complex multi-hop inferences - Implicit relationships requiring interpretation +## DIRECT RELATIONSHIP PRIORITY + +ALWAYS create direct subject→predicate→object relationships. Avoid intermediate container entities that add unnecessary graph hops. + +✅ PREFERRED (1-hop traversal, optimal recall): +- "Sarah's manager is Mike" → Sarah → managed_by → Mike +- "Alex prefers dark mode" → Alex → prefers → "dark mode" +- "Office in Boston" → Office → located_in → Boston +- "User avoids creating files" → User → avoids → "creating new files" +- "Home address is 123 Main St" → User → has_home_address → "123 Main St, Boston" + +❌ AVOID (2-hop traversal, poor recall): +- Sarah → has → Manager [then] Manager → is → Mike (adds extra hop) +- Alex → has → Preferences [then] Preferences → includes → "dark mode" (adds extra hop) +- Office → has → Location [then] Location → is_in → Boston (adds extra hop) + +## ATOMIC BUT CONTEXTUAL FACTS + +When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries. + +✅ GOOD (Atomic + Contextual): +- "Sarah prefers morning workouts at the gym" +- "Family orders pizza for Friday movie nights" +- "Alex drinks green tea when working late" +- "Doctor recommends stretching exercises for back pain" +- "Team celebrates birthdays with lunch outings" +- "Maria reads fiction books during vacation" + +❌ BAD (Atomic but Decontextualized - loses scope): +- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?) +- "Family orders pizza" (when? weekends? special occasions? always?) +- "Alex drinks green tea" (when? all day? specific times? why?) +- "Doctor recommends stretching" (for what? general health? specific condition?) +- "Team celebrates birthdays" (how? where? what tradition?) +- "Maria reads fiction books" (when? always? specific contexts?) + +**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries. + +**Intermediate Entity Exception**: Only create intermediate entities if they represent meaningful concepts with multiple distinct properties: +- ✅ "Employment Contract 2024" (has salary, duration, benefits, start_date, role, etc.) +- ✅ "Annual Performance Review" (has ratings, achievements, goals, feedback, etc.) +- ❌ "User Preferences" (just a container for preference values - use direct User → prefers → X) +- ❌ "Manager" (just points to a person - use direct Sarah → managed_by → Mike) +- ❌ "Home Address" (just holds an address - use direct User → has_home_address → "address") + +**Guideline**: If the intermediate entity would have only 1-2 properties, make it a direct relationship instead. + CRITICAL REQUIREMENT: - You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects. - The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list. @@ -102,15 +149,6 @@ Follow these instructions: - predicate: The relationship type (can be a descriptive phrase) - target: The object entity (MUST be from AVAILABLE ENTITIES) -## SAME-NAME ENTITY RELATIONSHIP FORMATION -When entities share identical names but have different types, CREATE explicit relationship statements: -- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)" -- **Person-Location**: "Smith (Person)" → "lives in", "founded", or "is associated with" → "Smith (City)" -- **Event-Location**: "Conference (Event)" → "takes place at" or "is hosted by" → "Conference (Venue)" -- **Product-Company**: "Tesla (Product)" → "is manufactured by" or "is developed by" → "Tesla (Company)" -- **MANDATORY**: Always create at least one relationship statement for same-name entities -- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context - ## DURATION AND TEMPORAL CONTEXT ENTITY USAGE When Duration or TemporalContext entities are available in AVAILABLE ENTITIES: - **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements @@ -307,6 +345,28 @@ Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, H **Reference**: Document → references → Entity **Employment**: Person → works_for → Organization +## ATOMIC BUT CONTEXTUAL FACTS + +When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries. + +✅ GOOD (Atomic + Contextual): +- "Sarah prefers morning workouts at the gym" +- "Family orders pizza for Friday movie nights" +- "Alex drinks green tea when working late" +- "Doctor recommends stretching exercises for back pain" +- "Team celebrates birthdays with lunch outings" +- "Maria reads fiction books during vacation" + +❌ BAD (Atomic but Decontextualized - loses scope): +- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?) +- "Family orders pizza" (when? weekends? special occasions? always?) +- "Alex drinks green tea" (when? all day? specific times? why?) +- "Doctor recommends stretching" (for what? general health? specific condition?) +- "Team celebrates birthdays" (how? where? what tradition?) +- "Maria reads fiction books" (when? always? specific contexts?) + +**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries. + ## RELATIONSHIP QUALITY HIERARCHY ## RELATIONSHIP TEMPLATES (High Priority) diff --git a/apps/webapp/app/services/search.server.ts b/apps/webapp/app/services/search.server.ts index 3be654a..6e14c8b 100644 --- a/apps/webapp/app/services/search.server.ts +++ b/apps/webapp/app/services/search.server.ts @@ -1,9 +1,7 @@ import type { EpisodicNode, StatementNode } from "@core/types"; import { logger } from "./logger.service"; import { - applyCohereReranking, - applyCrossEncoderReranking, - applyMultiFactorMMRReranking, + applyLLMReranking, } from "./search/rerank"; import { getEpisodesByStatements, @@ -14,7 +12,6 @@ import { import { getEmbedding } from "~/lib/model.server"; import { prisma } from "~/db.server"; import { runQuery } from "~/lib/neo4j.server"; -import { env } from "~/env.server"; /** * SearchService provides methods to search the reified + temporal knowledge graph @@ -41,7 +38,7 @@ export class SearchService { // Default options const opts: Required = { - limit: options.limit || 10, + limit: options.limit || 100, maxBfsDepth: options.maxBfsDepth || 4, validAt: options.validAt || new Date(), startTime: options.startTime || null, @@ -61,7 +58,7 @@ export class SearchService { const [bm25Results, vectorResults, bfsResults] = await Promise.all([ performBM25Search(query, userId, opts), performVectorSearch(queryVector, userId, opts), - performBfsSearch(queryVector, userId, opts), + performBfsSearch(query, queryVector, userId, opts), ]); logger.info( @@ -77,7 +74,6 @@ export class SearchService { // // 3. Apply adaptive filtering based on score threshold and minimum count const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts); - // const filteredResults = rankedStatements; // 3. Return top results const episodes = await getEpisodesByStatements(filteredResults.map((item) => item.statement)); @@ -234,31 +230,8 @@ export class SearchService { }, options: Required, ): Promise { - // Count non-empty result sources - const nonEmptySources = [ - results.bm25.length > 0, - results.vector.length > 0, - results.bfs.length > 0, - ].filter(Boolean).length; - if (env.COHERE_API_KEY) { - logger.info("Using Cohere reranking"); - return applyCohereReranking(query, results, options); - } - - // If results are coming from only one source, use cross-encoder reranking - if (nonEmptySources <= 1) { - logger.info( - "Only one source has results, falling back to cross-encoder reranking", - ); - return applyCrossEncoderReranking(query, results); - } - - // Otherwise use combined MultiFactorReranking + MMR for multiple sources - return applyMultiFactorMMRReranking(results, { - lambda: 0.7, // Balance relevance (0.7) vs diversity (0.3) - maxResults: options.limit > 0 ? options.limit * 2 : 100, // Get more results for filtering - }); + return applyLLMReranking(query, results,); } private async logRecallAsync( diff --git a/apps/webapp/app/services/search/rerank.ts b/apps/webapp/app/services/search/rerank.ts index 2eb9a07..b29f41e 100644 --- a/apps/webapp/app/services/search/rerank.ts +++ b/apps/webapp/app/services/search/rerank.ts @@ -442,6 +442,87 @@ export function applyMultiFactorReranking(results: { return sortedResults; } +/** + * Apply LLM-based reranking for contextual understanding + * Uses GPT-4o-mini to verify relevance with semantic reasoning + */ +export async function applyLLMReranking( + query: string, + results: { + bm25: StatementNode[]; + vector: StatementNode[]; + bfs: StatementNode[]; + }, + limit: number = 10, +): Promise { + const allResults = [ + ...results.bm25.slice(0, 100), + ...results.vector.slice(0, 100), + ...results.bfs.slice(0, 100), + ]; + const uniqueResults = combineAndDeduplicateStatements(allResults); + logger.info(`Unique results: ${uniqueResults.length}`); + + if (uniqueResults.length === 0) { + logger.info("No results to rerank with Cohere"); + return []; + } + + + const prompt = `You are a relevance filter. Given a user query and a list of facts, identify ONLY the facts that are truly relevant to answering the query. + +Query: "${query}" + +Facts: +${uniqueResults.map((r, i) => `${i}. ${r.fact}`).join('\n')} + +Instructions: +- A fact is RELEVANT if it directly answers or provides information needed to answer the query +- A fact is NOT RELEVANT if it's tangentially related but doesn't answer the query +- Consider semantic meaning, not just keyword matching +- Only return facts with HIGH relevance (≥80% confidence) +- If you are not sure, return an empty array + +Output format: +[1, 5, 7] + +Return ONLY the numbers of highly relevant facts inside tags as a JSON array:`; + + try { + let responseText = ""; + await makeModelCall( + false, + [{ role: "user", content: prompt }], + (text) => { responseText = text; }, + { temperature: 0}, + 'high' + ); + + // Extract array from [1, 5, 7] + const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); + if (outputMatch && outputMatch[1]) { + responseText = outputMatch[1].trim(); + const parsedResponse = JSON.parse(responseText || "[]"); + const extractedIndices = Array.isArray(parsedResponse) ? parsedResponse : (parsedResponse.entities || []); + + + if (extractedIndices.length === 0) { + logger.warn("LLM reranking returned no valid indices, falling back to original order"); + return []; + } + + logger.info(`LLM reranking selected ${extractedIndices.length} relevant facts`); + const selected = extractedIndices.map((i: number) => uniqueResults[i]); + return selected; + } + + return uniqueResults.slice(0, limit); + } catch (error) { + logger.error("LLM reranking failed, falling back to original order:", { error }); + return uniqueResults.slice(0, limit); + } +} + /** * Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching * This is particularly effective for bridging the semantic gap between questions and factual statements @@ -456,6 +537,7 @@ export async function applyCohereReranking( options?: { limit?: number; model?: string; + useLLMVerification?: boolean; }, ): Promise { const { model = "rerank-v3.5" } = options || {}; @@ -491,10 +573,13 @@ export async function applyCohereReranking( // Prepare documents for Cohere API const documents = uniqueResults.map((statement) => statement.fact); + console.log("Documents:", documents); logger.info( `Cohere reranking ${documents.length} statements with model ${model}`, ); + logger.info(`Cohere query: "${query}"`); + logger.info(`First 5 documents: ${documents.slice(0, 5).join(' | ')}`); // Call Cohere Rerank API const response = await cohere.rerank({ @@ -506,6 +591,11 @@ export async function applyCohereReranking( console.log("Cohere reranking billed units:", response.meta?.billedUnits); + // Log top 5 Cohere results for debugging + logger.info(`Cohere top 5 results:\n${response.results.slice(0, 5).map((r, i) => + ` ${i + 1}. [${r.relevanceScore.toFixed(4)}] ${documents[r.index].substring(0, 80)}...` + ).join('\n')}`); + // Map results back to StatementNodes with Cohere scores const rerankedResults = response.results .map((result, index) => ({ @@ -513,7 +603,7 @@ export async function applyCohereReranking( cohereScore: result.relevanceScore, cohereRank: index + 1, })) - .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD)); + // .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD)); const responseTime = Date.now() - startTime; logger.info( diff --git a/apps/webapp/app/services/search/utils.ts b/apps/webapp/app/services/search/utils.ts index a63debe..012d175 100644 --- a/apps/webapp/app/services/search/utils.ts +++ b/apps/webapp/app/services/search/utils.ts @@ -3,6 +3,8 @@ import type { SearchOptions } from "../search.server"; import type { Embedding } from "ai"; import { logger } from "../logger.service"; import { runQuery } from "~/lib/neo4j.server"; +import { getEmbedding } from "~/lib/model.server"; +import { findSimilarEntities } from "../graphModels/entity"; /** * Perform BM25 keyword-based search on statements @@ -129,25 +131,26 @@ export async function performVectorSearch( `; } - // 1. Search for similar statements using Neo4j vector search with provenance count + const limit = options.limit || 100; + // 1. Search for similar statements using GDS cosine similarity with provenance count const cypher = ` - CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding) - YIELD node AS s, score + MATCH (s:Statement) WHERE s.userId = $userId - AND score >= 0.7 ${timeframeCondition} ${spaceCondition} + WITH s, gds.similarity.cosine(s.factEmbedding, $embedding) AS score + WHERE score >= 0.5 OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s) WITH s, score, count(episode) as provenanceCount RETURN s, score, provenanceCount ORDER BY score DESC + LIMIT ${limit} `; const params = { embedding: query, userId, validAt: options.endTime.toISOString(), - topk: options.limit || 100, ...(options.startTime && { startTime: options.startTime.toISOString() }), ...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }), }; @@ -170,133 +173,223 @@ export async function performVectorSearch( /** * Perform BFS traversal starting from entities mentioned in the query + * Uses guided search with semantic filtering to reduce noise */ export async function performBfsSearch( + query: string, embedding: Embedding, userId: string, options: Required, ): Promise { try { - // 1. Extract potential entities from query - const entities = await extractEntitiesFromQuery(embedding, userId); + // 1. Extract potential entities from query using chunked embeddings + const entities = await extractEntitiesFromQuery(query, userId); - // 2. For each entity, perform BFS traversal - const allStatements: StatementNode[] = []; - - for (const entity of entities) { - const statements = await bfsTraversal( - entity.uuid, - options.maxBfsDepth, - options.endTime, - userId, - options.includeInvalidated, - options.startTime, - options.spaceIds, - ); - allStatements.push(...statements); + if (entities.length === 0) { + return []; } - return allStatements; + // 2. Perform guided BFS with semantic filtering + const statements = await bfsTraversal( + entities, + embedding, + options.maxBfsDepth || 3, + options.endTime, + userId, + options.includeInvalidated, + options.startTime, + ); + + // Return individual statements + return statements; } catch (error) { logger.error("BFS search error:", { error }); return []; } } + /** - * Perform BFS traversal starting from an entity + * Iterative BFS traversal - explores up to 3 hops level-by-level using Neo4j cosine similarity */ -export async function bfsTraversal( - startEntityId: string, +async function bfsTraversal( + startEntities: EntityNode[], + queryEmbedding: Embedding, maxDepth: number, validAt: Date, userId: string, includeInvalidated: boolean, startTime: Date | null, - spaceIds: string[] = [], ): Promise { - try { - // Build the WHERE clause based on timeframe options - let timeframeCondition = ` - AND s.validAt <= $validAt - ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'} - `; + const RELEVANCE_THRESHOLD = 0.5; + const EXPLORATION_THRESHOLD = 0.3; - // If startTime is provided, add condition to filter by validAt >= startTime - if (startTime) { - timeframeCondition = ` - AND s.validAt <= $validAt - ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'} - AND s.validAt >= $startTime - `; - } + const allStatements = new Map(); // uuid -> relevance + const visitedEntities = new Set(); - // Add space filtering if spaceIds are provided - let spaceCondition = ""; - if (spaceIds.length > 0) { - spaceCondition = ` - AND s.spaceIds IS NOT NULL AND ANY(spaceId IN $spaceIds WHERE spaceId IN s.spaceIds) - `; - } + // Track entities per level for iterative BFS + let currentLevelEntities = startEntities.map(e => e.uuid); - // Use Neo4j's built-in path finding capabilities for efficient BFS - // This query implements BFS up to maxDepth and collects all statements along the way + // Timeframe condition for temporal filtering + let timeframeCondition = ` + AND s.validAt <= $validAt + ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'} + `; + if (startTime) { + timeframeCondition += ` AND s.validAt >= $startTime`; + } + + // Process each depth level + for (let depth = 0; depth < maxDepth; depth++) { + if (currentLevelEntities.length === 0) break; + + // Mark entities as visited at this depth + currentLevelEntities.forEach(id => visitedEntities.add(`${id}`)); + + // Get statements for current level entities with cosine similarity calculated in Neo4j const cypher = ` - MATCH (e:Entity {uuid: $startEntityId})<-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement) - WHERE - (s.userId = $userId) - ${includeInvalidated ? 'AND s.validAt <= $validAt' : timeframeCondition} - ${spaceCondition} - RETURN s as statement + MATCH (e:Entity{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement{userId: $userId}) + WHERE e.uuid IN $entityIds + ${timeframeCondition} + WITH DISTINCT s // Deduplicate first + WITH s, gds.similarity.cosine(s.factEmbedding, $queryEmbedding) AS relevance + WHERE relevance >= $explorationThreshold + RETURN s.uuid AS uuid, relevance + ORDER BY relevance DESC + LIMIT 200 // Cap per BFS level to avoid explosion `; - const params = { - startEntityId, - maxDepth, - validAt: validAt.toISOString(), + const records = await runQuery(cypher, { + entityIds: currentLevelEntities, userId, - includeInvalidated, + queryEmbedding, + explorationThreshold: EXPLORATION_THRESHOLD, + validAt: validAt.toISOString(), ...(startTime && { startTime: startTime.toISOString() }), - ...(spaceIds.length > 0 && { spaceIds }), - }; + }); - const records = await runQuery(cypher, params); - return records.map( - (record) => record.get("statement").properties as StatementNode, - ); - } catch (error) { - logger.error("BFS traversal error:", { error }); + // Store statement relevance scores + const currentLevelStatementUuids: string[] = []; + for (const record of records) { + const uuid = record.get("uuid"); + const relevance = record.get("relevance"); + + if (!allStatements.has(uuid)) { + allStatements.set(uuid, relevance); + currentLevelStatementUuids.push(uuid); + } + } + + // Get connected entities for next level + if (depth < maxDepth - 1 && currentLevelStatementUuids.length > 0) { + const nextCypher = ` + MATCH (s:Statement{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]->(e:Entity{userId: $userId}) + WHERE s.uuid IN $statementUuids + RETURN DISTINCT e.uuid AS entityId + `; + + const nextRecords = await runQuery(nextCypher, { + statementUuids: currentLevelStatementUuids, + userId + }); + + // Filter out already visited entities + currentLevelEntities = nextRecords + .map(r => r.get("entityId")) + .filter(id => !visitedEntities.has(`${id}`)); + + } else { + currentLevelEntities = []; + } + } + + // Filter by relevance threshold and fetch full statements + const relevantUuids = Array.from(allStatements.entries()) + .filter(([_, relevance]) => relevance >= RELEVANCE_THRESHOLD) + .sort((a, b) => b[1] - a[1]) + .map(([uuid]) => uuid); + + if (relevantUuids.length === 0) { return []; } + + const fetchCypher = ` + MATCH (s:Statement{userId: $userId}) + WHERE s.uuid IN $uuids + RETURN s + `; + const fetchRecords = await runQuery(fetchCypher, { uuids: relevantUuids, userId }); + const statements = fetchRecords.map(r => r.get("s").properties as StatementNode); + + logger.info( + `BFS: explored ${allStatements.size} statements across ${maxDepth} hops, returning ${statements.length} (≥${RELEVANCE_THRESHOLD})` + ); + + return statements; +} + + +/** + * Generate query chunks (individual words and bigrams) for entity extraction + */ +function generateQueryChunks(query: string): string[] { + const words = query.toLowerCase() + .trim() + .split(/\s+/) + .filter(word => word.length > 0); + + const chunks: string[] = []; + + // Add individual words (for entities like "user") + chunks.push(...words); + + // Add bigrams (for multi-word entities like "home address") + for (let i = 0; i < words.length - 1; i++) { + chunks.push(`${words[i]} ${words[i + 1]}`); + } + + // Add full query as final chunk + chunks.push(query.toLowerCase().trim()); + + return chunks; } /** - * Extract potential entities from a query using embeddings or LLM + * Extract potential entities from a query using chunked embeddings + * Chunks query into words/bigrams, embeds each chunk, finds entities for each */ export async function extractEntitiesFromQuery( - embedding: Embedding, + query: string, userId: string, ): Promise { try { - // Use vector similarity to find relevant entities - const cypher = ` - // Match entities using vector index on name embeddings - CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding) - YIELD node AS e, score - WHERE e.userId = $userId - AND score > 0.7 - RETURN e - ORDER BY score DESC - `; + // Generate chunks from query + const chunks = generateQueryChunks(query); - const params = { - embedding, - userId, - }; + // Get embeddings for each chunk + const chunkEmbeddings = await Promise.all( + chunks.map(chunk => getEmbedding(chunk)) + ); - const records = await runQuery(cypher, params); + // Search for entities matching each chunk embedding + const allEntitySets = await Promise.all( + chunkEmbeddings.map(async (embedding) => { + return await findSimilarEntities({ + queryEmbedding: embedding, + limit: 3, + threshold: 0.7, + userId, + }); + }) + ); - return records.map((record) => record.get("e").properties as EntityNode); + // Flatten and deduplicate entities by ID + const allEntities = allEntitySets.flat(); + const uniqueEntities = Array.from( + new Map(allEntities.map(e => [e.uuid, e])).values() + ); + + return uniqueEntities; } catch (error) { logger.error("Entity extraction error:", { error }); return [];