Fix: improve knowledge graph and better recall

2026-01-11 18:48:27 +00:00 · 2025-10-06 01:36:37 +05:30 · 2025-10-06 01:36:37 +05:30 · 4de39a5871
commit 4de39a5871
parent 7960a5ed03
10 changed files with 445 additions and 163 deletions
--- a/apps/webapp/app/lib/model.server.ts
+++ b/apps/webapp/app/lib/model.server.ts
@ -83,8 +83,9 @@ export async function makeModelCall(

  const generateTextOptions: any = {}

-
-  console.log('complexity:', complexity, 'model:', model)
+  logger.info(
+    `complexity: ${complexity}, model: ${model}`,
+  );
  switch (model) {
    case "gpt-4.1-2025-04-14":
    case "gpt-4.1-mini-2025-04-14":
--- a/apps/webapp/app/services/graphModels/entity.ts
+++ b/apps/webapp/app/services/graphModels/entity.ts
@ -76,16 +76,19 @@ export async function findSimilarEntities(params: {
  threshold: number;
  userId: string;
 }): Promise<EntityNode[]> {
+  const limit = params.limit || 5;
  const query = `
-          CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
-          YIELD node AS entity, score
+          CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
+          YIELD node AS entity
+          WHERE entity.userId = $userId
+          WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
          WHERE score >= $threshold
-          AND entity.userId = $userId
          RETURN entity, score
          ORDER BY score DESC
+          LIMIT ${limit}
        `;

-  const result = await runQuery(query, { ...params, topK: params.limit });
+  const result = await runQuery(query, { ...params });
  return result.map((record) => {
    const entity = record.get("entity").properties;

@ -110,17 +113,20 @@ export async function findSimilarEntitiesWithSameType(params: {
  threshold: number;
  userId: string;
 }): Promise<EntityNode[]> {
+  const limit = params.limit || 5;
  const query = `
-          CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
-          YIELD node AS entity, score
-          WHERE score >= $threshold
-          AND entity.userId = $userId
+          CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
+          YIELD node AS entity
+          WHERE entity.userId = $userId
          AND entity.type = $entityType
+          WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
+          WHERE score >= $threshold
          RETURN entity, score
          ORDER BY score DESC
+          LIMIT ${limit}
        `;

-  const result = await runQuery(query, { ...params, topK: params.limit });
+  const result = await runQuery(query, { ...params });
  return result.map((record) => {
    const entity = record.get("entity").properties;

--- a/apps/webapp/app/services/graphModels/episode.ts
+++ b/apps/webapp/app/services/graphModels/episode.ts
@ -138,19 +138,21 @@ export async function searchEpisodesByEmbedding(params: {
  limit?: number;
  minSimilarity?: number;
 }) {
+  const limit = params.limit || 100;
  const query = `
-  CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
-  YIELD node AS episode, score
+  CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
+  YIELD node AS episode
  WHERE episode.userId = $userId
-    AND score >= $minSimilarity
+  WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
+  WHERE score >= $minSimilarity
  RETURN episode, score
-  ORDER BY score DESC`;
+  ORDER BY score DESC
+  LIMIT ${limit}`;

  const result = await runQuery(query, {
    embedding: params.embedding,
    minSimilarity: params.minSimilarity,
    userId: params.userId,
-    topK: 100,
  });

  if (!result || result.length === 0) {
@ -281,20 +283,22 @@ export async function getRelatedEpisodesEntities(params: {
  limit?: number;
  minSimilarity?: number;
 }) {
+  const limit = params.limit || 100;
  const query = `
-  CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
-  YIELD node AS episode, score
+  CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
+  YIELD node AS episode
  WHERE episode.userId = $userId
-    AND score >= $minSimilarity
+  WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
+  WHERE score >= $minSimilarity
  OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
  WHERE entity IS NOT NULL
-  RETURN DISTINCT entity`;
+  RETURN DISTINCT entity
+  LIMIT ${limit}`;

  const result = await runQuery(query, {
    embedding: params.embedding,
    minSimilarity: params.minSimilarity,
    userId: params.userId,
-    topK: params.limit || 100,
  });

  return result
--- a/apps/webapp/app/services/graphModels/statement.ts
+++ b/apps/webapp/app/services/graphModels/statement.ts
@ -211,15 +211,18 @@ export async function findSimilarStatements({
  excludeIds?: string[];
  userId: string;
 }): Promise<Omit<StatementNode, "factEmbedding">[]> {
+  const limit = 100;
  const query = `
-      CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
-      YIELD node AS statement, score
+      CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $factEmbedding)
+      YIELD node AS statement
      WHERE statement.userId = $userId
        AND statement.invalidAt IS NULL
-        AND score >= $threshold
        ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
+      WITH statement, gds.similarity.cosine(statement.factEmbedding, $factEmbedding) AS score
+      WHERE score >= $threshold
      RETURN statement, score
      ORDER BY score DESC
+      LIMIT ${limit}
    `;

  const result = await runQuery(query, {
@ -227,7 +230,6 @@ export async function findSimilarStatements({
    threshold,
    excludeIds,
    userId,
-    topK: 100,
  });

  if (!result || result.length === 0) {
@ -410,14 +412,17 @@ export async function searchStatementsByEmbedding(params: {
  limit?: number;
  minSimilarity?: number;
 }) {
+  const limit = params.limit || 100;
  const query = `
-  CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
-  YIELD node AS statement, score
+  CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $embedding)
+  YIELD node AS statement
  WHERE statement.userId = $userId
    AND statement.invalidAt IS NULL
-    AND score >= $minSimilarity
+  WITH statement, gds.similarity.cosine(statement.factEmbedding, $embedding) AS score
+  WHERE score >= $minSimilarity
  RETURN statement, score
  ORDER BY score DESC
+  LIMIT ${limit}
 `;

  const result = await runQuery(query, {
@ -425,7 +430,6 @@ export async function searchStatementsByEmbedding(params: {
    minSimilarity: params.minSimilarity,
    limit: params.limit,
    userId: params.userId,
-    topK: params.limit || 100,
  });

  if (!result || result.length === 0) {
--- a/apps/webapp/app/services/prompts/nodes.ts
+++ b/apps/webapp/app/services/prompts/nodes.ts
@ -78,7 +78,24 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
   - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately.
   - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").

-8. **Entity Name Extraction**:
+8. **Entity Usefulness Test - SELECTIVITY FILTER**:
+   Before extracting an entity, ask: "Would this be useful in a search query?"
+
+   ✅ EXTRACT (Searchable, persistent concepts):
+   - Named entities: "Sarah", "OpenAI", "Boston", "Albert Heijn"
+   - Domain concepts: "Preferences", "Home Address", "Annual Review", "Coding Practice"
+   - Measurements: "10/10 rating", "$2.5 million", "75% completion"
+   - Abstract concepts: "Lean Startup", "DevOps Culture", "Quality Standards"
+
+   ❌ SKIP (Transient descriptors, low search value):
+   - Descriptive phrases: "new files", "existing code", "good practice", "necessary changes"
+   - Generic qualifiers: "better approach", "current version", "recent updates"
+   - Verb phrases: "creating documentation", "editing files", "avoiding mistakes"
+   - Adjective+noun combinations without specificity: "important meeting", "quick fix"
+
+   **GUIDELINE**: Extract stable concepts that persist across contexts. Skip ephemeral descriptors tied to single actions.
+
+9. **Entity Name Extraction**:
   - Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
   - When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car" 
   - When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
@ -87,7 +104,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
   - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
   - **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space")

-9. **Temporal and Relationship Context Extraction**:
+10. **Temporal and Relationship Context Extraction**:
   - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
   - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
   - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
--- a/apps/webapp/app/services/prompts/normalize.ts
+++ b/apps/webapp/app/services/prompts/normalize.ts
@ -11,19 +11,31 @@ CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION. Every separate fact, prefe

 OUTPUT GUIDELINES:
 - Simple content (1-2 facts): Use 1-2 concise sentences
- Complex content (multiple facts/categories): Use structured paragraphs organized by topic
+- Complex content (multiple facts/categories): Use multiple focused paragraphs, each covering ONE topic area
 - Technical content: Preserve specifications, commands, paths, version numbers, configurations
 - Let content complexity determine output length - completeness over arbitrary brevity
+- IMPORTANT: Break complex content into digestible paragraphs with natural sentence boundaries for easier fact extraction

 <enrichment_strategy>
 1. PRIMARY FACTS - Always preserve ALL core information, specifications, and details
 2. SPEAKER ATTRIBUTION - When content contains self-introductions ("I'm X", "My name is Y"), explicitly preserve speaker identity in third person (e.g., "the user introduced themselves as X" or "X introduced himself/herself")
 3. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using timestamp
 4. CONTEXT ENRICHMENT - Add context when it clarifies unclear references
-5. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
-6. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges
-7. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures
-8. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships
+5. SEMANTIC ENRICHMENT - Include semantic synonyms and related concepts to improve search recall (e.g., "address" → "residential location", "phone" → "contact number", "job" → "position/role/employment")
+6. ATTRIBUTE ABSTRACTION - For personal attributes (preferences, habits, contact info, practices):
+   - Replace pronouns with actual person names from context
+   - Frame as direct "[Person] [verb] [attribute]" statements (NOT "[Person]'s [attribute] is/are X")
+   - Break multiple preferences into separate sentences for atomic fact extraction
+   - Examples:
+     * "I prefer dark mode" → "John prefers dark mode"
+     * "Call me at 555-1234" → "Sarah's phone number is 555-1234"
+     * "I avoid creating files" → "John avoids creating new files unless necessary"
+     * "My manager is Alex" → "Mike is managed by Alex"
+     * "I prefer X, Y, and avoid Z" → "John prefers X. John prefers Y. John avoids Z."
+7. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
+8. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges
+9. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures
+10. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships

 CONTENT-ADAPTIVE APPROACH:
 - Conversations: Focus on dialogue context, relationships, emotional tone
@ -166,6 +178,28 @@ SIMPLE CONVERSATION - EMOTIONAL SUPPORT:
 - Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
 - Why: Simple temporal context, preserve emotional tone, no historical dumping

+SEMANTIC ENRICHMENT FOR BETTER SEARCH:
+- Original: "My address is 123 Main St. Boston, MA 02101"
+- Enriched: "On October 3, 2025, the user's residential address (home location) is 123 Main St. Boston, MA 02101."
+- Why: "residential address" and "home location" as synonyms improve semantic search for queries like "where does user live" or "residential location"
+
+- Original: "Call me at 555-1234"
+- Enriched: "On October 3, 2025, the user's phone number (contact number) is 555-1234."
+- Why: "phone number" and "contact number" as synonyms help queries like "how to contact" or "telephone"
+
+ATTRIBUTE ABSTRACTION FOR BETTER GRAPH RELATIONSHIPS:
+- Original: "I avoid creating new files unless necessary"
+- Enriched: "On October 3, 2025, John has a coding practice: avoid creating new files unless necessary."
+- Why: Creates direct relationship from person to practice for better graph traversal
+
+- Original: "I prefer editing existing code over writing new code"
+- Enriched: "On October 3, 2025, John prefers editing existing code over writing new code."
+- Why: Direct preference relationship enables queries like "what are John's preferences"
+
+- Original: "My manager is Sarah"
+- Enriched: "On October 3, 2025, Alex is managed by Sarah."
+- Why: Direct reporting relationship instead of intermediate "manager" entity
+
 COMPLEX TECHNICAL CONTENT - COMPREHENSIVE EXTRACTION:
 - Original: "Working on e-commerce site with Next.js 14. Run pnpm dev to start at port 3000. Using Prisma with PostgreSQL, Stripe for payments, Redis for caching. API routes in /api/*, database migrations in /prisma/migrations."
 - Enriched: "On January 15, 2024, the user is developing an e-commerce site built with Next.js 14. Development setup: pnpm dev starts local server on port 3000. Technology stack: Prisma ORM with PostgreSQL database, Stripe integration for payment processing, Redis for caching. Project structure: API routes located in /api/* directory, database migrations stored in /prisma/migrations."
--- a/apps/webapp/app/services/prompts/statements.ts
+++ b/apps/webapp/app/services/prompts/statements.ts
@ -72,6 +72,53 @@ For each entity, systematically check these common patterns:
 - Complex multi-hop inferences
 - Implicit relationships requiring interpretation

+## DIRECT RELATIONSHIP PRIORITY
+
+ALWAYS create direct subject→predicate→object relationships. Avoid intermediate container entities that add unnecessary graph hops.
+
+✅ PREFERRED (1-hop traversal, optimal recall):
+- "Sarah's manager is Mike" → Sarah → managed_by → Mike
+- "Alex prefers dark mode" → Alex → prefers → "dark mode"
+- "Office in Boston" → Office → located_in → Boston
+- "User avoids creating files" → User → avoids → "creating new files"
+- "Home address is 123 Main St" → User → has_home_address → "123 Main St, Boston"
+
+❌ AVOID (2-hop traversal, poor recall):
+- Sarah → has → Manager [then] Manager → is → Mike (adds extra hop)
+- Alex → has → Preferences [then] Preferences → includes → "dark mode" (adds extra hop)
+- Office → has → Location [then] Location → is_in → Boston (adds extra hop)
+
+## ATOMIC BUT CONTEXTUAL FACTS
+
+When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
+
+✅ GOOD (Atomic + Contextual):
+- "Sarah prefers morning workouts at the gym"
+- "Family orders pizza for Friday movie nights"
+- "Alex drinks green tea when working late"
+- "Doctor recommends stretching exercises for back pain"
+- "Team celebrates birthdays with lunch outings"
+- "Maria reads fiction books during vacation"
+
+❌ BAD (Atomic but Decontextualized - loses scope):
+- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
+- "Family orders pizza" (when? weekends? special occasions? always?)
+- "Alex drinks green tea" (when? all day? specific times? why?)
+- "Doctor recommends stretching" (for what? general health? specific condition?)
+- "Team celebrates birthdays" (how? where? what tradition?)
+- "Maria reads fiction books" (when? always? specific contexts?)
+
+**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
+
+**Intermediate Entity Exception**: Only create intermediate entities if they represent meaningful concepts with multiple distinct properties:
+- ✅ "Employment Contract 2024" (has salary, duration, benefits, start_date, role, etc.)
+- ✅ "Annual Performance Review" (has ratings, achievements, goals, feedback, etc.)
+- ❌ "User Preferences" (just a container for preference values - use direct User → prefers → X)
+- ❌ "Manager" (just points to a person - use direct Sarah → managed_by → Mike)
+- ❌ "Home Address" (just holds an address - use direct User → has_home_address → "address")
+
+**Guideline**: If the intermediate entity would have only 1-2 properties, make it a direct relationship instead.
+
 CRITICAL REQUIREMENT:
 - You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
 - The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list.
@ -102,15 +149,6 @@ Follow these instructions:
   - predicate: The relationship type (can be a descriptive phrase)
   - target: The object entity (MUST be from AVAILABLE ENTITIES)

-## SAME-NAME ENTITY RELATIONSHIP FORMATION
-When entities share identical names but have different types, CREATE explicit relationship statements:
- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)"
- **Person-Location**: "Smith (Person)" → "lives in", "founded", or "is associated with" → "Smith (City)"
- **Event-Location**: "Conference (Event)" → "takes place at" or "is hosted by" → "Conference (Venue)"
- **Product-Company**: "Tesla (Product)" → "is manufactured by" or "is developed by" → "Tesla (Company)"
- **MANDATORY**: Always create at least one relationship statement for same-name entities
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
-
 ## DURATION AND TEMPORAL CONTEXT ENTITY USAGE
 When Duration or TemporalContext entities are available in AVAILABLE ENTITIES:
 - **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements
@ -307,6 +345,28 @@ Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, H
 **Reference**: Document → references → Entity
 **Employment**: Person → works_for → Organization

+## ATOMIC BUT CONTEXTUAL FACTS
+
+When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
+
+✅ GOOD (Atomic + Contextual):
+- "Sarah prefers morning workouts at the gym"
+- "Family orders pizza for Friday movie nights"
+- "Alex drinks green tea when working late"
+- "Doctor recommends stretching exercises for back pain"
+- "Team celebrates birthdays with lunch outings"
+- "Maria reads fiction books during vacation"
+
+❌ BAD (Atomic but Decontextualized - loses scope):
+- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
+- "Family orders pizza" (when? weekends? special occasions? always?)
+- "Alex drinks green tea" (when? all day? specific times? why?)
+- "Doctor recommends stretching" (for what? general health? specific condition?)
+- "Team celebrates birthdays" (how? where? what tradition?)
+- "Maria reads fiction books" (when? always? specific contexts?)
+
+**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
+
 ## RELATIONSHIP QUALITY HIERARCHY

 ## RELATIONSHIP TEMPLATES (High Priority)
--- a/apps/webapp/app/services/search.server.ts
+++ b/apps/webapp/app/services/search.server.ts
@ -1,9 +1,7 @@
 import type { EpisodicNode, StatementNode } from "@core/types";
 import { logger } from "./logger.service";
 import {
-  applyCohereReranking,
-  applyCrossEncoderReranking,
-  applyMultiFactorMMRReranking,
+  applyLLMReranking,
 } from "./search/rerank";
 import {
  getEpisodesByStatements,
@ -14,7 +12,6 @@ import {
 import { getEmbedding } from "~/lib/model.server";
 import { prisma } from "~/db.server";
 import { runQuery } from "~/lib/neo4j.server";
-import { env } from "~/env.server";

 /**
 * SearchService provides methods to search the reified + temporal knowledge graph
@ -41,7 +38,7 @@ export class SearchService {
    // Default options

    const opts: Required<SearchOptions> = {
-      limit: options.limit || 10,
+      limit: options.limit || 100,
      maxBfsDepth: options.maxBfsDepth || 4,
      validAt: options.validAt || new Date(),
      startTime: options.startTime || null,
@ -61,7 +58,7 @@ export class SearchService {
    const [bm25Results, vectorResults, bfsResults] = await Promise.all([
      performBM25Search(query, userId, opts),
      performVectorSearch(queryVector, userId, opts),
-      performBfsSearch(queryVector, userId, opts),
+      performBfsSearch(query, queryVector, userId, opts),
    ]);

    logger.info(
@ -77,7 +74,6 @@ export class SearchService {

    // // 3. Apply adaptive filtering based on score threshold and minimum count
    const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
-    // const filteredResults = rankedStatements;

    // 3. Return top results
    const episodes = await getEpisodesByStatements(filteredResults.map((item) => item.statement));
@ -234,31 +230,8 @@ export class SearchService {
    },
    options: Required<SearchOptions>,
  ): Promise<StatementNode[]> {
-    // Count non-empty result sources
-    const nonEmptySources = [
-      results.bm25.length > 0,
-      results.vector.length > 0,
-      results.bfs.length > 0,
-    ].filter(Boolean).length;

-    if (env.COHERE_API_KEY) {
-      logger.info("Using Cohere reranking");
-      return applyCohereReranking(query, results, options);
-    }
-
-    // If results are coming from only one source, use cross-encoder reranking
-    if (nonEmptySources <= 1) {
-      logger.info(
-        "Only one source has results, falling back to cross-encoder reranking",
-      );
-      return applyCrossEncoderReranking(query, results);
-    }
-
-    // Otherwise use combined MultiFactorReranking + MMR for multiple sources
-    return applyMultiFactorMMRReranking(results, {
-      lambda: 0.7, // Balance relevance (0.7) vs diversity (0.3)
-      maxResults: options.limit > 0 ? options.limit * 2 : 100, // Get more results for filtering
-    });
+    return applyLLMReranking(query, results,);
  }

  private async logRecallAsync(
--- a/apps/webapp/app/services/search/rerank.ts
+++ b/apps/webapp/app/services/search/rerank.ts
@ -442,6 +442,87 @@ export function applyMultiFactorReranking(results: {
  return sortedResults;
 }

+/**
+ * Apply LLM-based reranking for contextual understanding
+ * Uses GPT-4o-mini to verify relevance with semantic reasoning
+ */
+export async function applyLLMReranking(
+  query: string,
+  results: {
+    bm25: StatementNode[];
+    vector: StatementNode[];
+    bfs: StatementNode[];
+  },
+  limit: number = 10,
+): Promise<StatementNode[]> {
+  const allResults = [
+    ...results.bm25.slice(0, 100),
+    ...results.vector.slice(0, 100),
+    ...results.bfs.slice(0, 100),
+  ];
+  const uniqueResults = combineAndDeduplicateStatements(allResults);
+  logger.info(`Unique results: ${uniqueResults.length}`);
+
+  if (uniqueResults.length === 0) {
+    logger.info("No results to rerank with Cohere");
+    return [];
+  }
+
+
+  const prompt = `You are a relevance filter. Given a user query and a list of facts, identify ONLY the facts that are truly relevant to answering the query.
+
+Query: "${query}"
+
+Facts:
+${uniqueResults.map((r, i) => `${i}. ${r.fact}`).join('\n')}
+
+Instructions:
+- A fact is RELEVANT if it directly answers or provides information needed to answer the query
+- A fact is NOT RELEVANT if it's tangentially related but doesn't answer the query
+- Consider semantic meaning, not just keyword matching
+- Only return facts with HIGH relevance (≥80% confidence)
+- If you are not sure, return an empty array
+
+Output format:
+<output>[1, 5, 7]</output>
+
+Return ONLY the numbers of highly relevant facts inside <output> tags as a JSON array:`;
+
+  try {
+    let responseText = "";
+    await makeModelCall(
+      false,
+      [{ role: "user", content: prompt }],
+      (text) => { responseText = text; },
+      { temperature: 0},
+      'high'
+    );
+
+    // Extract array from <output>[1, 5, 7]</output>
+    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
+    if (outputMatch && outputMatch[1]) {
+      responseText = outputMatch[1].trim();
+      const parsedResponse = JSON.parse(responseText || "[]");
+      const extractedIndices = Array.isArray(parsedResponse) ? parsedResponse : (parsedResponse.entities || []);
+
+
+      if (extractedIndices.length === 0) {
+        logger.warn("LLM reranking returned no valid indices, falling back to original order");
+        return [];
+      }
+
+      logger.info(`LLM reranking selected ${extractedIndices.length} relevant facts`);
+      const selected = extractedIndices.map((i: number) => uniqueResults[i]);
+      return selected;
+    }
+    
+    return uniqueResults.slice(0, limit);
+  } catch (error) {
+    logger.error("LLM reranking failed, falling back to original order:", { error });
+    return uniqueResults.slice(0, limit);
+  }
+}
+
 /**
 * Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching
 * This is particularly effective for bridging the semantic gap between questions and factual statements
@ -456,6 +537,7 @@ export async function applyCohereReranking(
  options?: {
    limit?: number;
    model?: string;
+    useLLMVerification?: boolean;
  },
 ): Promise<StatementNode[]> {
  const { model = "rerank-v3.5" } = options || {};
@ -491,10 +573,13 @@ export async function applyCohereReranking(

    // Prepare documents for Cohere API
    const documents = uniqueResults.map((statement) => statement.fact);
+    console.log("Documents:", documents);

    logger.info(
      `Cohere reranking ${documents.length} statements with model ${model}`,
    );
+    logger.info(`Cohere query: "${query}"`);
+    logger.info(`First 5 documents: ${documents.slice(0, 5).join(' | ')}`);

    // Call Cohere Rerank API
    const response = await cohere.rerank({
@ -506,6 +591,11 @@ export async function applyCohereReranking(

    console.log("Cohere reranking billed units:", response.meta?.billedUnits);

+    // Log top 5 Cohere results for debugging
+    logger.info(`Cohere top 5 results:\n${response.results.slice(0, 5).map((r, i) =>
+      `  ${i + 1}. [${r.relevanceScore.toFixed(4)}] ${documents[r.index].substring(0, 80)}...`
+    ).join('\n')}`);
+
    // Map results back to StatementNodes with Cohere scores
    const rerankedResults = response.results
      .map((result, index) => ({
@ -513,7 +603,7 @@ export async function applyCohereReranking(
        cohereScore: result.relevanceScore,
        cohereRank: index + 1,
      }))
-      .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));
+      // .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));

    const responseTime = Date.now() - startTime;
    logger.info(
--- a/apps/webapp/app/services/search/utils.ts
+++ b/apps/webapp/app/services/search/utils.ts
@ -3,6 +3,8 @@ import type { SearchOptions } from "../search.server";
 import type { Embedding } from "ai";
 import { logger } from "../logger.service";
 import { runQuery } from "~/lib/neo4j.server";
+import { getEmbedding } from "~/lib/model.server";
+import { findSimilarEntities } from "../graphModels/entity";

 /**
 * Perform BM25 keyword-based search on statements
@ -129,25 +131,26 @@ export async function performVectorSearch(
      `;
    }

-    // 1. Search for similar statements using Neo4j vector search with provenance count
+    const limit = options.limit || 100;
+    // 1. Search for similar statements using GDS cosine similarity with provenance count
    const cypher = `
-    CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
-    YIELD node AS s, score
+    MATCH (s:Statement)
    WHERE s.userId = $userId
-    AND score >= 0.7
    ${timeframeCondition}
    ${spaceCondition}
+    WITH s, gds.similarity.cosine(s.factEmbedding, $embedding) AS score
+    WHERE score >= 0.5
    OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
    WITH s, score, count(episode) as provenanceCount
    RETURN s, score, provenanceCount
    ORDER BY score DESC
+    LIMIT ${limit}
  `;

    const params = {
      embedding: query,
      userId,
      validAt: options.endTime.toISOString(),
-      topk: options.limit || 100,
      ...(options.startTime && { startTime: options.startTime.toISOString() }),
      ...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
    };
@ -170,133 +173,223 @@ export async function performVectorSearch(

 /**
 * Perform BFS traversal starting from entities mentioned in the query
+ * Uses guided search with semantic filtering to reduce noise
 */
 export async function performBfsSearch(
+  query: string,
  embedding: Embedding,
  userId: string,
  options: Required<SearchOptions>,
 ): Promise<StatementNode[]> {
  try {
-    // 1. Extract potential entities from query
-    const entities = await extractEntitiesFromQuery(embedding, userId);
+    // 1. Extract potential entities from query using chunked embeddings
+    const entities = await extractEntitiesFromQuery(query, userId);

-    // 2. For each entity, perform BFS traversal
-    const allStatements: StatementNode[] = [];
-
-    for (const entity of entities) {
-      const statements = await bfsTraversal(
-        entity.uuid,
-        options.maxBfsDepth,
-        options.endTime,
-        userId,
-        options.includeInvalidated,
-        options.startTime,
-        options.spaceIds,
-      );
-      allStatements.push(...statements);
+    if (entities.length === 0) {
+      return [];
    }

-    return allStatements;
+    // 2. Perform guided BFS with semantic filtering
+    const statements = await bfsTraversal(
+      entities,
+      embedding,
+      options.maxBfsDepth || 3,
+      options.endTime,
+      userId,
+      options.includeInvalidated,
+      options.startTime,
+    );
+
+    // Return individual statements
+    return statements;
  } catch (error) {
    logger.error("BFS search error:", { error });
    return [];
  }
 }

+
 /**
- * Perform BFS traversal starting from an entity
+ * Iterative BFS traversal - explores up to 3 hops level-by-level using Neo4j cosine similarity
 */
-export async function bfsTraversal(
-  startEntityId: string,
+async function bfsTraversal(
+  startEntities: EntityNode[],
+  queryEmbedding: Embedding,
  maxDepth: number,
  validAt: Date,
  userId: string,
  includeInvalidated: boolean,
  startTime: Date | null,
-  spaceIds: string[] = [],
 ): Promise<StatementNode[]> {
-  try {
-    // Build the WHERE clause based on timeframe options
-    let timeframeCondition = `
-      AND s.validAt <= $validAt
-      ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
-    `;
+  const RELEVANCE_THRESHOLD = 0.5;
+  const EXPLORATION_THRESHOLD = 0.3;

-    // If startTime is provided, add condition to filter by validAt >= startTime
-    if (startTime) {
-      timeframeCondition = `
-        AND s.validAt <= $validAt
-        ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
-        AND s.validAt >= $startTime
-      `;
-    }
+  const allStatements = new Map<string, number>(); // uuid -> relevance
+  const visitedEntities = new Set<string>();

-    // Add space filtering if spaceIds are provided
-    let spaceCondition = "";
-    if (spaceIds.length > 0) {
-      spaceCondition = `
-        AND s.spaceIds IS NOT NULL AND ANY(spaceId IN $spaceIds WHERE spaceId IN s.spaceIds)
-      `;
-    }
+  // Track entities per level for iterative BFS
+  let currentLevelEntities = startEntities.map(e => e.uuid);

-    // Use Neo4j's built-in path finding capabilities for efficient BFS
-    // This query implements BFS up to maxDepth and collects all statements along the way
+  // Timeframe condition for temporal filtering
+  let timeframeCondition = `
+    AND s.validAt <= $validAt
+    ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
+  `;
+  if (startTime) {
+    timeframeCondition += ` AND s.validAt >= $startTime`;
+  }
+
+  // Process each depth level
+  for (let depth = 0; depth < maxDepth; depth++) {
+    if (currentLevelEntities.length === 0) break;
+
+    // Mark entities as visited at this depth
+    currentLevelEntities.forEach(id => visitedEntities.add(`${id}`));
+
+    // Get statements for current level entities with cosine similarity calculated in Neo4j
    const cypher = `
-      MATCH (e:Entity {uuid: $startEntityId})<-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement)
-      WHERE
-        (s.userId = $userId)
-        ${includeInvalidated ? 'AND s.validAt <= $validAt' : timeframeCondition}
-        ${spaceCondition}
-      RETURN s as statement
+      MATCH (e:Entity{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement{userId: $userId})
+      WHERE e.uuid IN $entityIds
+        ${timeframeCondition}
+      WITH DISTINCT s  // Deduplicate first
+      WITH s, gds.similarity.cosine(s.factEmbedding, $queryEmbedding) AS relevance
+      WHERE relevance >= $explorationThreshold
+      RETURN s.uuid AS uuid, relevance
+      ORDER BY relevance DESC
+      LIMIT 200  // Cap per BFS level to avoid explosion
    `;

-    const params = {
-      startEntityId,
-      maxDepth,
-      validAt: validAt.toISOString(),
+    const records = await runQuery(cypher, {
+      entityIds: currentLevelEntities,
      userId,
-      includeInvalidated,
+      queryEmbedding,
+      explorationThreshold: EXPLORATION_THRESHOLD,
+      validAt: validAt.toISOString(),
      ...(startTime && { startTime: startTime.toISOString() }),
-      ...(spaceIds.length > 0 && { spaceIds }),
-    };
+    });

-    const records = await runQuery(cypher, params);
-    return records.map(
-      (record) => record.get("statement").properties as StatementNode,
-    );
-  } catch (error) {
-    logger.error("BFS traversal error:", { error });
+    // Store statement relevance scores
+    const currentLevelStatementUuids: string[] = [];
+    for (const record of records) {
+      const uuid = record.get("uuid");
+      const relevance = record.get("relevance");
+
+      if (!allStatements.has(uuid)) {
+        allStatements.set(uuid, relevance);
+        currentLevelStatementUuids.push(uuid);
+      }
+    }
+
+    // Get connected entities for next level
+    if (depth < maxDepth - 1 && currentLevelStatementUuids.length > 0) {
+      const nextCypher = `
+        MATCH (s:Statement{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]->(e:Entity{userId: $userId})
+        WHERE s.uuid IN $statementUuids
+        RETURN DISTINCT e.uuid AS entityId
+      `;
+
+      const nextRecords = await runQuery(nextCypher, {
+        statementUuids: currentLevelStatementUuids,
+        userId
+      });
+
+      // Filter out already visited entities
+      currentLevelEntities = nextRecords
+        .map(r => r.get("entityId"))
+        .filter(id => !visitedEntities.has(`${id}`));
+
+    } else {
+      currentLevelEntities = [];
+    }
+  }
+
+  // Filter by relevance threshold and fetch full statements
+  const relevantUuids = Array.from(allStatements.entries())
+    .filter(([_, relevance]) => relevance >= RELEVANCE_THRESHOLD)
+    .sort((a, b) => b[1] - a[1])
+    .map(([uuid]) => uuid);
+
+  if (relevantUuids.length === 0) {
    return [];
  }
+
+  const fetchCypher = `
+    MATCH (s:Statement{userId: $userId})
+    WHERE s.uuid IN $uuids
+    RETURN s
+  `;
+  const fetchRecords = await runQuery(fetchCypher, { uuids: relevantUuids, userId });
+  const statements = fetchRecords.map(r => r.get("s").properties as StatementNode);
+
+  logger.info(
+    `BFS: explored ${allStatements.size} statements across ${maxDepth} hops, returning ${statements.length} (≥${RELEVANCE_THRESHOLD})`
+  );
+
+  return statements;
+}
+
+
+/**
+ * Generate query chunks (individual words and bigrams) for entity extraction
+ */
+function generateQueryChunks(query: string): string[] {
+  const words = query.toLowerCase()
+    .trim()
+    .split(/\s+/)
+    .filter(word => word.length > 0);
+
+  const chunks: string[] = [];
+
+  // Add individual words (for entities like "user")
+  chunks.push(...words);
+
+  // Add bigrams (for multi-word entities like "home address")
+  for (let i = 0; i < words.length - 1; i++) {
+    chunks.push(`${words[i]} ${words[i + 1]}`);
+  }
+
+  // Add full query as final chunk
+  chunks.push(query.toLowerCase().trim());
+
+  return chunks;
 }

 /**
- * Extract potential entities from a query using embeddings or LLM
+ * Extract potential entities from a query using chunked embeddings
+ * Chunks query into words/bigrams, embeds each chunk, finds entities for each
 */
 export async function extractEntitiesFromQuery(
-  embedding: Embedding,
+  query: string,
  userId: string,
 ): Promise<EntityNode[]> {
  try {
-    // Use vector similarity to find relevant entities
-    const cypher = `
-        // Match entities using vector index on name embeddings
-        CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
-        YIELD node AS e, score
-        WHERE e.userId = $userId
-          AND score > 0.7
-        RETURN e
-        ORDER BY score DESC
-      `;
+    // Generate chunks from query
+    const chunks = generateQueryChunks(query);

-    const params = {
-      embedding,
-      userId,
-    };
+    // Get embeddings for each chunk
+    const chunkEmbeddings = await Promise.all(
+      chunks.map(chunk => getEmbedding(chunk))
+    );

-    const records = await runQuery(cypher, params);
+    // Search for entities matching each chunk embedding
+    const allEntitySets = await Promise.all(
+      chunkEmbeddings.map(async (embedding) => {
+        return await findSimilarEntities({
+          queryEmbedding: embedding,
+          limit: 3,
+          threshold: 0.7,
+          userId,
+        });
+      })
+    );

-    return records.map((record) => record.get("e").properties as EntityNode);
+    // Flatten and deduplicate entities by ID
+    const allEntities = allEntitySets.flat();
+    const uniqueEntities = Array.from(
+      new Map(allEntities.map(e => [e.uuid, e])).values()
+    );
+
+    return uniqueEntities;
  } catch (error) {
    logger.error("Entity extraction error:", { error });
    return [];