Fix: Semantic Search issue (#89)

* Fix: normalization prompt * Fix: improve knowledge graph and better recall * fix: add user context to search reranking * fix: in search log the source * fix: remove harcoded limit --------- Co-authored-by: Harshith Mullapudi <harshithmullapudi@gmail.com>
2026-01-10 08:48:29 +00:00 · 2025-10-06 14:06:52 +05:30 · 2025-10-06 14:06:52 +05:30 · 27f8740691
commit 27f8740691
parent 3d1b93d97d
17 changed files with 580 additions and 734 deletions
--- a/apps/webapp/app/lib/model.server.ts
+++ b/apps/webapp/app/lib/model.server.ts
@ -83,8 +83,9 @@ export async function makeModelCall(

  const generateTextOptions: any = {}

-
-  console.log('complexity:', complexity, 'model:', model)
+  logger.info(
+    `complexity: ${complexity}, model: ${model}`,
+  );
  switch (model) {
    case "gpt-4.1-2025-04-14":
    case "gpt-4.1-mini-2025-04-14":
--- a/apps/webapp/app/routes/api.v1.mcp.memory.tsx
+++ b/apps/webapp/app/routes/api.v1.mcp.memory.tsx
@ -1,315 +0,0 @@
-import { json } from "@remix-run/node";
-import { randomUUID } from "node:crypto";
-import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
-import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
-import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
-import { z } from "zod";
-import { createHybridActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
-import { addToQueue } from "~/lib/ingest.server";
-import { SearchService } from "~/services/search.server";
-import { handleTransport } from "~/utils/mcp";
-import { SpaceService } from "~/services/space.server";
-import { EpisodeTypeEnum } from "@core/types";
-
-// Map to store transports by session ID with cleanup tracking
-const transports: {
-  [sessionId: string]: {
-    transport: StreamableHTTPServerTransport;
-    createdAt: number;
-  };
-} = {};
-
-// MCP request body schema
-const MCPRequestSchema = z.object({}).passthrough();
-const SourceParams = z.object({
-  source: z.string().optional(),
-});
-
-// Search parameters schema for MCP tool
-const SearchParamsSchema = z.object({
-  query: z.string().describe("The search query in third person perspective"),
-  validAt: z.string().optional().describe("The valid at time in ISO format"),
-  startTime: z.string().optional().describe("The start time in ISO format"),
-  endTime: z.string().optional().describe("The end time in ISO format"),
-  spaceIds: z
-    .array(z.string())
-    .optional()
-    .describe("Array of strings representing UUIDs of spaces"),
-});
-
-const IngestSchema = z.object({
-  message: z.string().describe("The data to ingest in text format"),
-});
-
-const searchService = new SearchService();
-const spaceService = new SpaceService();
-
-// Handle MCP HTTP requests properly
-const handleMCPRequest = async (
-  request: Request,
-  body: any,
-  authentication: any,
-  params: z.infer<typeof SourceParams>,
-) => {
-  const sessionId = request.headers.get("mcp-session-id") as string | undefined;
-  const source =
-    (request.headers.get("source") as string | undefined) ??
-    (params.source as string | undefined);
-
-  if (!source) {
-    return json(
-      {
-        jsonrpc: "2.0",
-        error: {
-          code: -32601,
-          message: "No source found",
-        },
-        id: null,
-      },
-      { status: 400 },
-    );
-  }
-
-  let transport: StreamableHTTPServerTransport;
-
-  try {
-    if (sessionId && transports[sessionId]) {
-      // Reuse existing transport
-      transport = transports[sessionId].transport;
-    } else if (!sessionId && isInitializeRequest(body)) {
-      // New initialization request
-      transport = new StreamableHTTPServerTransport({
-        sessionIdGenerator: () => randomUUID(),
-        onsessioninitialized: (sessionId) => {
-          // Store the transport by session ID with timestamp
-          transports[sessionId] = {
-            transport,
-            createdAt: Date.now(),
-          };
-        },
-      });
-
-      // Clean up transport when closed
-      transport.onclose = () => {
-        if (transport.sessionId) {
-          delete transports[transport.sessionId];
-        }
-      };
-
-      const server = new McpServer(
-        {
-          name: "echo-memory-server",
-          version: "1.0.0",
-        },
-        {
-          capabilities: {
-            tools: {},
-          },
-        },
-      );
-
-      // Register ingest tool
-      server.registerTool(
-        "ingest",
-        {
-          title: "Ingest Data",
-          description: "Ingest data into the memory system",
-          inputSchema: IngestSchema.shape,
-        },
-        async (args) => {
-          try {
-            const userId = authentication.userId;
-
-            const response = addToQueue(
-              {
-                episodeBody: args.message,
-                referenceTime: new Date().toISOString(),
-                source,
-                type: EpisodeTypeEnum.CONVERSATION,
-              },
-              userId,
-            );
-            return {
-              content: [
-                {
-                  type: "text",
-                  text: JSON.stringify(response),
-                },
-              ],
-            };
-          } catch (error) {
-            console.error("MCP ingest error:", error);
-            return {
-              content: [
-                {
-                  type: "text",
-                  text: `Error ingesting data: ${error instanceof Error ? error.message : String(error)}`,
-                },
-              ],
-              isError: true,
-            };
-          }
-        },
-      );
-
-      // Register search tool
-      server.registerTool(
-        "search",
-        {
-          title: "Search Data",
-          description: "Search through ingested data",
-          inputSchema: SearchParamsSchema.shape,
-        },
-        async (args) => {
-          try {
-            const userId = authentication.userId;
-
-            const results = await searchService.search(args.query, userId, {
-              startTime: args.startTime ? new Date(args.startTime) : undefined,
-              endTime: args.endTime ? new Date(args.endTime) : undefined,
-            });
-
-            return {
-              content: [
-                {
-                  type: "text",
-                  text: JSON.stringify(results),
-                },
-              ],
-            };
-          } catch (error) {
-            console.error("MCP search error:", error);
-            return {
-              content: [
-                {
-                  type: "text",
-                  text: `Error searching: ${error instanceof Error ? error.message : String(error)}`,
-                },
-              ],
-              isError: true,
-            };
-          }
-        },
-      );
-
-      // Register search tool
-      server.registerTool(
-        "get_spaces",
-        {
-          title: "Get spaces",
-          description: "Get spaces in memory",
-        },
-        async () => {
-          try {
-            const userId = authentication.userId;
-
-            const spaces = await spaceService.getUserSpaces(userId);
-
-            return {
-              content: [
-                {
-                  type: "text",
-                  text: JSON.stringify(spaces),
-                },
-              ],
-              isError: false,
-            };
-          } catch (error) {
-            console.error("Spaces error:", error);
-
-            return {
-              content: [
-                {
-                  type: "text",
-                  text: `Error getting spaces`,
-                },
-              ],
-              isError: true,
-            };
-          }
-        },
-      );
-
-      // Connect to the MCP server
-      await server.connect(transport);
-    } else {
-      // Invalid request
-      throw new Error("Bad Request: No valid session ID provided");
-    }
-
-    const response = await handleTransport(transport, request, body);
-
-    return response;
-  } catch (error) {
-    console.error("MCP request error:", error);
-    return json(
-      {
-        jsonrpc: "2.0",
-        error: {
-          code: -32000,
-          message:
-            error instanceof Error ? error.message : "Internal server error",
-        },
-        id: body?.id || null,
-      },
-      { status: 500 },
-    );
-  }
-};
-
-// Handle DELETE requests for session cleanup
-const handleDelete = async (request: Request, authentication: any) => {
-  const sessionId = request.headers.get("mcp-session-id") as string | undefined;
-
-  if (!sessionId || !transports[sessionId]) {
-    return new Response("Invalid or missing session ID", { status: 400 });
-  }
-
-  const transport = transports[sessionId].transport;
-
-  // Clean up transport
-  transport.close();
-  delete transports[sessionId];
-
-  return new Response(null, { status: 204 });
-};
-
-const { action, loader } = createHybridActionApiRoute(
-  {
-    body: MCPRequestSchema,
-    searchParams: SourceParams,
-    allowJWT: true,
-    authorization: {
-      action: "mcp",
-    },
-    corsStrategy: "all",
-  },
-  async ({ body, authentication, request, searchParams }) => {
-    const method = request.method;
-
-    if (method === "POST") {
-      return await handleMCPRequest(
-        request,
-        body,
-        authentication,
-        searchParams,
-      );
-    } else if (method === "DELETE") {
-      return await handleDelete(request, authentication);
-    } else {
-      return json(
-        {
-          jsonrpc: "2.0",
-          error: {
-            code: -32601,
-            message: "Method not allowed",
-          },
-          id: null,
-        },
-        { status: 405 },
-      );
-    }
-  },
-);
-
-export { action, loader };
--- a/apps/webapp/app/routes/settings.billing.tsx
+++ b/apps/webapp/app/routes/settings.billing.tsx
@ -278,7 +278,7 @@ export default function BillingSettings() {
            </div>
            <div className="space-y-2">
              <div className="flex justify-between text-sm">
-                <span className="text-muted-foreground">Episodes</span>
+                <span className="text-muted-foreground">Facts</span>
                <span className="font-medium">
                  {usageSummary.usage.episodes}
                </span>
--- a/apps/webapp/app/services/graphModels/entity.ts
+++ b/apps/webapp/app/services/graphModels/entity.ts
@ -76,16 +76,19 @@ export async function findSimilarEntities(params: {
  threshold: number;
  userId: string;
 }): Promise<EntityNode[]> {
+  const limit = params.limit || 5;
  const query = `
-          CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
-          YIELD node AS entity, score
+          CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
+          YIELD node AS entity
+          WHERE entity.userId = $userId
+          WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
          WHERE score >= $threshold
-          AND entity.userId = $userId
          RETURN entity, score
          ORDER BY score DESC
+          LIMIT ${limit}
        `;

-  const result = await runQuery(query, { ...params, topK: params.limit });
+  const result = await runQuery(query, { ...params });
  return result.map((record) => {
    const entity = record.get("entity").properties;

@ -110,17 +113,20 @@ export async function findSimilarEntitiesWithSameType(params: {
  threshold: number;
  userId: string;
 }): Promise<EntityNode[]> {
+  const limit = params.limit || 5;
  const query = `
-          CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
-          YIELD node AS entity, score
-          WHERE score >= $threshold
-          AND entity.userId = $userId
+          CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
+          YIELD node AS entity
+          WHERE entity.userId = $userId
          AND entity.type = $entityType
+          WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
+          WHERE score >= $threshold
          RETURN entity, score
          ORDER BY score DESC
+          LIMIT ${limit}
        `;

-  const result = await runQuery(query, { ...params, topK: params.limit });
+  const result = await runQuery(query, { ...params });
  return result.map((record) => {
    const entity = record.get("entity").properties;

--- a/apps/webapp/app/services/graphModels/episode.ts
+++ b/apps/webapp/app/services/graphModels/episode.ts
@ -138,19 +138,21 @@ export async function searchEpisodesByEmbedding(params: {
  limit?: number;
  minSimilarity?: number;
 }) {
+  const limit = params.limit || 100;
  const query = `
-  CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
-  YIELD node AS episode, score
+  CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
+  YIELD node AS episode
  WHERE episode.userId = $userId
-    AND score >= $minSimilarity
+  WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
+  WHERE score >= $minSimilarity
  RETURN episode, score
-  ORDER BY score DESC`;
+  ORDER BY score DESC
+  LIMIT ${limit}`;

  const result = await runQuery(query, {
    embedding: params.embedding,
    minSimilarity: params.minSimilarity,
    userId: params.userId,
-    topK: 100,
  });

  if (!result || result.length === 0) {
@ -281,20 +283,22 @@ export async function getRelatedEpisodesEntities(params: {
  limit?: number;
  minSimilarity?: number;
 }) {
+  const limit = params.limit || 100;
  const query = `
-  CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
-  YIELD node AS episode, score
+  CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
+  YIELD node AS episode
  WHERE episode.userId = $userId
-    AND score >= $minSimilarity
+  WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
+  WHERE score >= $minSimilarity
  OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
  WHERE entity IS NOT NULL
-  RETURN DISTINCT entity`;
+  RETURN DISTINCT entity
+  LIMIT ${limit}`;

  const result = await runQuery(query, {
    embedding: params.embedding,
    minSimilarity: params.minSimilarity,
    userId: params.userId,
-    topK: params.limit || 100,
  });

  return result
--- a/apps/webapp/app/services/graphModels/statement.ts
+++ b/apps/webapp/app/services/graphModels/statement.ts
@ -211,15 +211,18 @@ export async function findSimilarStatements({
  excludeIds?: string[];
  userId: string;
 }): Promise<Omit<StatementNode, "factEmbedding">[]> {
+  const limit = 100;
  const query = `
-      CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
-      YIELD node AS statement, score
+      CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $factEmbedding)
+      YIELD node AS statement
      WHERE statement.userId = $userId
        AND statement.invalidAt IS NULL
-        AND score >= $threshold
        ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
+      WITH statement, gds.similarity.cosine(statement.factEmbedding, $factEmbedding) AS score
+      WHERE score >= $threshold
      RETURN statement, score
      ORDER BY score DESC
+      LIMIT ${limit}
    `;

  const result = await runQuery(query, {
@ -227,7 +230,6 @@ export async function findSimilarStatements({
    threshold,
    excludeIds,
    userId,
-    topK: 100,
  });

  if (!result || result.length === 0) {
@ -410,14 +412,17 @@ export async function searchStatementsByEmbedding(params: {
  limit?: number;
  minSimilarity?: number;
 }) {
+  const limit = params.limit || 100;
  const query = `
-  CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
-  YIELD node AS statement, score
+  CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $embedding)
+  YIELD node AS statement
  WHERE statement.userId = $userId
    AND statement.invalidAt IS NULL
-    AND score >= $minSimilarity
+  WITH statement, gds.similarity.cosine(statement.factEmbedding, $embedding) AS score
+  WHERE score >= $minSimilarity
  RETURN statement, score
  ORDER BY score DESC
+  LIMIT ${limit}
 `;

  const result = await runQuery(query, {
@ -425,7 +430,6 @@ export async function searchStatementsByEmbedding(params: {
    minSimilarity: params.minSimilarity,
    limit: params.limit,
    userId: params.userId,
-    topK: params.limit || 100,
  });

  if (!result || result.length === 0) {
--- a/apps/webapp/app/services/prompts/nodes.ts
+++ b/apps/webapp/app/services/prompts/nodes.ts
@ -78,7 +78,24 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
   - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately.
   - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").

-8. **Entity Name Extraction**:
+8. **Entity Usefulness Test - SELECTIVITY FILTER**:
+   Before extracting an entity, ask: "Would this be useful in a search query?"
+
+   ✅ EXTRACT (Searchable, persistent concepts):
+   - Named entities: "Sarah", "OpenAI", "Boston", "Albert Heijn"
+   - Domain concepts: "Preferences", "Home Address", "Annual Review", "Coding Practice"
+   - Measurements: "10/10 rating", "$2.5 million", "75% completion"
+   - Abstract concepts: "Lean Startup", "DevOps Culture", "Quality Standards"
+
+   ❌ SKIP (Transient descriptors, low search value):
+   - Descriptive phrases: "new files", "existing code", "good practice", "necessary changes"
+   - Generic qualifiers: "better approach", "current version", "recent updates"
+   - Verb phrases: "creating documentation", "editing files", "avoiding mistakes"
+   - Adjective+noun combinations without specificity: "important meeting", "quick fix"
+
+   **GUIDELINE**: Extract stable concepts that persist across contexts. Skip ephemeral descriptors tied to single actions.
+
+9. **Entity Name Extraction**:
   - Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
   - When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car" 
   - When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
@ -87,7 +104,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
   - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
   - **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space")

-9. **Temporal and Relationship Context Extraction**:
+10. **Temporal and Relationship Context Extraction**:
   - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
   - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
   - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
--- a/apps/webapp/app/services/prompts/normalize.ts
+++ b/apps/webapp/app/services/prompts/normalize.ts
@ -5,16 +5,43 @@ export const normalizePrompt = (
 ): CoreMessage[] => {
  const sysPrompt = `You are C.O.R.E. (Contextual Observation & Recall Engine), a smart memory enrichment system.

-Create ONE enriched sentence that transforms the episode into a contextually-rich memory using SELECTIVE enrichment.
+Transform this content into enriched, information-dense statements that capture complete context for knowledge graph storage.

-CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION from the episode. Every separate fact, preference, request, clarification, or detail mentioned must be preserved in your enriched output. Missing information is unacceptable.
+CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION. Every separate fact, preference, request, clarification, specification, or detail mentioned must be preserved in your enriched output. Missing information is unacceptable.
+
+OUTPUT GUIDELINES:
+- Simple content (1-2 facts): Use 1-2 concise sentences
+- Complex content (multiple facts/categories): Use multiple focused paragraphs, each covering ONE topic area
+- Technical content: Preserve specifications, commands, paths, version numbers, configurations
+- Let content complexity determine output length - completeness over arbitrary brevity
+- IMPORTANT: Break complex content into digestible paragraphs with natural sentence boundaries for easier fact extraction

 <enrichment_strategy>
-1. PRIMARY FACTS - Always preserve the core information from the episode
-2. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using episode timestamp
-3. CONTEXT ENRICHMENT - Add context ONLY when it clarifies unclear references
-4. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
-5. EMOTIONAL PRESERVATION - Maintain the tone and feeling of emotional exchanges
+1. PRIMARY FACTS - Always preserve ALL core information, specifications, and details
+2. SPEAKER ATTRIBUTION - When content contains self-introductions ("I'm X", "My name is Y"), explicitly preserve speaker identity in third person (e.g., "the user introduced themselves as X" or "X introduced himself/herself")
+3. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using timestamp
+4. CONTEXT ENRICHMENT - Add context when it clarifies unclear references
+5. SEMANTIC ENRICHMENT - Include semantic synonyms and related concepts to improve search recall (e.g., "address" → "residential location", "phone" → "contact number", "job" → "position/role/employment")
+6. ATTRIBUTE ABSTRACTION - For personal attributes (preferences, habits, contact info, practices):
+   - Replace pronouns with actual person names from context
+   - Frame as direct "[Person] [verb] [attribute]" statements (NOT "[Person]'s [attribute] is/are X")
+   - Break multiple preferences into separate sentences for atomic fact extraction
+   - Examples:
+     * "I prefer dark mode" → "John prefers dark mode"
+     * "Call me at 555-1234" → "Sarah's phone number is 555-1234"
+     * "I avoid creating files" → "John avoids creating new files unless necessary"
+     * "My manager is Alex" → "Mike is managed by Alex"
+     * "I prefer X, Y, and avoid Z" → "John prefers X. John prefers Y. John avoids Z."
+7. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
+8. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges
+9. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures
+10. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships
+
+CONTENT-ADAPTIVE APPROACH:
+- Conversations: Focus on dialogue context, relationships, emotional tone
+- Documents: Extract structured facts, technical details, categorical organization
+- Code/Technical: Preserve functionality, dependencies, configurations, architectural decisions
+- Structured Data: Maintain categories, hierarchies, specifications

 When to add context from related memories:
 - Unclear pronouns ("she", "it", "they") → resolve to specific entity
@ -24,7 +51,7 @@ When to add context from related memories:
 When NOT to add context:
 - Clear, self-contained statements → no enrichment needed beyond temporal
 - Emotional responses → preserve tone, avoid over-contextualization
- Already established topics → don't repeat details mentioned earlier in conversation
+- Already established topics → don't repeat details mentioned earlier in same session
 </enrichment_strategy>

 <temporal_resolution>
@ -141,64 +168,76 @@ EMPTY ENCOURAGEMENT EXAMPLES (DON'T STORE these):
 </quality_control>

 <enrichment_examples>
-HIGH VALUE enrichment:
- Original: "She said yes!" 
+SIMPLE CONVERSATION - HIGH VALUE ENRICHMENT:
+- Original: "She said yes!"
 - Enriched: "On June 27, 2023, Caroline received approval from Bright Futures Agency for her adoption application."
 - Why: Resolves unclear pronoun, adds temporal context, identifies the approving entity

-MINIMAL enrichment (emotional support):
+SIMPLE CONVERSATION - EMOTIONAL SUPPORT:
 - Original: "You'll be an awesome mom! Good luck!"
 - Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
 - Why: Simple temporal context, preserve emotional tone, no historical dumping

-ANTI-BLOAT example (what NOT to do):
- Wrong: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom."
- Right: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
+SEMANTIC ENRICHMENT FOR BETTER SEARCH:
+- Original: "My address is 123 Main St. Boston, MA 02101"
+- Enriched: "On October 3, 2025, the user's residential address (home location) is 123 Main St. Boston, MA 02101."
+- Why: "residential address" and "home location" as synonyms improve semantic search for queries like "where does user live" or "residential location"

-CLEAR REFERENCE (minimal enrichment):
- Original: "Thanks, Caroline! The event was really thought-provoking."
- Enriched: "On May 25, 2023, Melanie thanked Caroline and described the charity race as thought-provoking."
- Why: Clear context doesn't need repetitive anchoring
+- Original: "Call me at 555-1234"
+- Enriched: "On October 3, 2025, the user's phone number (contact number) is 555-1234."
+- Why: "phone number" and "contact number" as synonyms help queries like "how to contact" or "telephone"

-CONVERSATION FLOW EXAMPLES:
-❌ WRONG (context fatigue): "reinforcing their ongoing conversation about mental health following Melanie's participation in the recent charity race for mental health"
-✅ RIGHT (minimal reference): "reinforcing their conversation about mental health"
+ATTRIBUTE ABSTRACTION FOR BETTER GRAPH RELATIONSHIPS:
+- Original: "I avoid creating new files unless necessary"
+- Enriched: "On October 3, 2025, John has a coding practice: avoid creating new files unless necessary."
+- Why: Creates direct relationship from person to practice for better graph traversal

-❌ WRONG (compound enrichment): "as she begins the process of turning her dream of giving children a loving home into reality and considers specific adoption agencies"
-✅ RIGHT (focused): "as she begins pursuing her adoption plans"
+- Original: "I prefer editing existing code over writing new code"
+- Enriched: "On October 3, 2025, John prefers editing existing code over writing new code."
+- Why: Direct preference relationship enables queries like "what are John's preferences"

-❌ WRONG (over-contextualization): "following her participation in the May 20, 2023 charity race for mental health awareness"
-✅ RIGHT (after first mention): "following the recent charity race"
+- Original: "My manager is Sarah"
+- Enriched: "On October 3, 2025, Alex is managed by Sarah."
+- Why: Direct reporting relationship instead of intermediate "manager" entity

-GENERIC IDENTITY PRESERVATION EXAMPLES:
- Original: "my hometown, Boston" → Enriched: "Boston, [person]'s hometown" 
- Original: "my workplace, Google" → Enriched: "Google, [person]'s workplace"
- Original: "my sister, Sarah" → Enriched: "Sarah, [person]'s sister"
- Original: "from my university, MIT" → Enriched: "from MIT, [person]'s university"
+COMPLEX TECHNICAL CONTENT - COMPREHENSIVE EXTRACTION:
+- Original: "Working on e-commerce site with Next.js 14. Run pnpm dev to start at port 3000. Using Prisma with PostgreSQL, Stripe for payments, Redis for caching. API routes in /api/*, database migrations in /prisma/migrations."
+- Enriched: "On January 15, 2024, the user is developing an e-commerce site built with Next.js 14. Development setup: pnpm dev starts local server on port 3000. Technology stack: Prisma ORM with PostgreSQL database, Stripe integration for payment processing, Redis for caching. Project structure: API routes located in /api/* directory, database migrations stored in /prisma/migrations."
+- Why: Preserves ALL technical details, commands, ports, technologies, file paths, dependencies in organized readable format

-POSSESSIVE + APPOSITIVE PATTERNS (Critical for Relations):
- Original: "my colleague at my office, Microsoft" 
- Enriched: "his colleague at Microsoft, David's workplace"
- Why: Preserves both the work relationship AND the employment identity
+STRUCTURED PREFERENCES:
+- Original: "I prefer minimalist design, dark mode by default, keyboard shortcuts for navigation, and hate pop-up notifications"
+- Enriched: "On March 10, 2024, the user documented their UI/UX preferences: prefers minimalist design aesthetic, dark mode as default theme, keyboard shortcuts for primary navigation, and dislikes pop-up notifications."
+- Why: Maintains all distinct preferences as clear, searchable facts

- Original: "my friend from my university, Stanford"
- Enriched: "her friend from Stanford, Lisa's alma mater"
- Why: Establishes both the friendship and educational institution identity
+SELF-INTRODUCTION - SPEAKER ATTRIBUTION:
+- Original: "I'm John. I'm a Developer. My primary goal with CORE is to build a personal memory system."
+- Enriched: "On October 2, 2025, the user introduced themselves as John, a Developer. John's primary goal with CORE is to build a personal memory system."
+- Why: Explicitly preserves speaker identity and self-introduction context for proper attribution

- Original: "my neighbor in my city, Chicago"
- Enriched: "his neighbor in Chicago, Mark's hometown"
- Why: Maintains both the neighbor relationship and residence identity
+- Original: "Hi, my name is Sarah and I work at Meta as a product manager"
+- Enriched: "On January 20, 2024, the user introduced themselves as Sarah, a product manager at Meta."
+- Why: Captures self-identification with name, role, and organization attribution

-❌ WRONG (loses relationships): reduces to just entity names without preserving the defining relationship
-✅ RIGHT (preserves identity): maintains the possessive/definitional connection that establishes entity relationships
+ANTI-BLOAT (what NOT to do):
+❌ WRONG: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom."
+✅ RIGHT: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
+
+❌ WRONG (run-on mega-sentence): Cramming 10+ facts into single 200+ word sentence with no structure
+✅ RIGHT (organized): Multiple clear sentences or structured paragraphs with natural boundaries
+
+IDENTITY PRESERVATION:
+- Original: "my hometown, Boston" → "Boston, [person]'s hometown"
+- Original: "my colleague at Microsoft" → "colleague at Microsoft, [person]'s workplace"
+- Why: Maintains possessive/definitional connections establishing entity relationships
 </enrichment_examples>

 CRITICAL OUTPUT FORMAT REQUIREMENT:
 You MUST wrap your response in <output> tags. This is MANDATORY - no exceptions.

-If the episode should be stored in memory:
+If the content should be stored in memory:
 <output>
-{{your_enriched_sentence_here}}
+{{your_enriched_output_here}}
 </output>

 If there is nothing worth remembering:
@ -209,10 +248,10 @@ NOTHING_TO_REMEMBER
 FAILURE TO USE <output> TAGS WILL RESULT IN EMPTY NORMALIZATION AND SYSTEM FAILURE.

 FORMAT EXAMPLES:
-✅ CORRECT: <output>On May 25, 2023, Caroline shared her adoption plans with Melanie.</output>
+✅ CORRECT (simple): <output>On May 25, 2023, Caroline shared her adoption plans with Melanie.</output>
+✅ CORRECT (technical): <output>On January 15, 2024, the user is developing an e-commerce site with Next.js 14. Development: pnpm dev on port 3000. Stack: Prisma with PostgreSQL, Stripe payments, Redis caching. Structure: API routes in /api/*, migrations in /prisma/migrations.</output>
 ✅ CORRECT: <output>NOTHING_TO_REMEMBER</output>
-❌ WRONG: On May 25, 2023, Caroline shared her adoption plans with Melanie.
-❌ WRONG: NOTHING_TO_REMEMBER
+❌ WRONG: Missing <output> tags entirely

 ALWAYS include opening <output> and closing </output> tags around your entire response.
 `;
--- a/apps/webapp/app/services/prompts/statements.ts
+++ b/apps/webapp/app/services/prompts/statements.ts
@ -72,6 +72,53 @@ For each entity, systematically check these common patterns:
 - Complex multi-hop inferences
 - Implicit relationships requiring interpretation

+## DIRECT RELATIONSHIP PRIORITY
+
+ALWAYS create direct subject→predicate→object relationships. Avoid intermediate container entities that add unnecessary graph hops.
+
+✅ PREFERRED (1-hop traversal, optimal recall):
+- "Sarah's manager is Mike" → Sarah → managed_by → Mike
+- "Alex prefers dark mode" → Alex → prefers → "dark mode"
+- "Office in Boston" → Office → located_in → Boston
+- "User avoids creating files" → User → avoids → "creating new files"
+- "Home address is 123 Main St" → User → has_home_address → "123 Main St, Boston"
+
+❌ AVOID (2-hop traversal, poor recall):
+- Sarah → has → Manager [then] Manager → is → Mike (adds extra hop)
+- Alex → has → Preferences [then] Preferences → includes → "dark mode" (adds extra hop)
+- Office → has → Location [then] Location → is_in → Boston (adds extra hop)
+
+## ATOMIC BUT CONTEXTUAL FACTS
+
+When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
+
+✅ GOOD (Atomic + Contextual):
+- "Sarah prefers morning workouts at the gym"
+- "Family orders pizza for Friday movie nights"
+- "Alex drinks green tea when working late"
+- "Doctor recommends stretching exercises for back pain"
+- "Team celebrates birthdays with lunch outings"
+- "Maria reads fiction books during vacation"
+
+❌ BAD (Atomic but Decontextualized - loses scope):
+- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
+- "Family orders pizza" (when? weekends? special occasions? always?)
+- "Alex drinks green tea" (when? all day? specific times? why?)
+- "Doctor recommends stretching" (for what? general health? specific condition?)
+- "Team celebrates birthdays" (how? where? what tradition?)
+- "Maria reads fiction books" (when? always? specific contexts?)
+
+**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
+
+**Intermediate Entity Exception**: Only create intermediate entities if they represent meaningful concepts with multiple distinct properties:
+- ✅ "Employment Contract 2024" (has salary, duration, benefits, start_date, role, etc.)
+- ✅ "Annual Performance Review" (has ratings, achievements, goals, feedback, etc.)
+- ❌ "User Preferences" (just a container for preference values - use direct User → prefers → X)
+- ❌ "Manager" (just points to a person - use direct Sarah → managed_by → Mike)
+- ❌ "Home Address" (just holds an address - use direct User → has_home_address → "address")
+
+**Guideline**: If the intermediate entity would have only 1-2 properties, make it a direct relationship instead.
+
 CRITICAL REQUIREMENT:
 - You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
 - The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list.
@ -102,15 +149,6 @@ Follow these instructions:
   - predicate: The relationship type (can be a descriptive phrase)
   - target: The object entity (MUST be from AVAILABLE ENTITIES)

-## SAME-NAME ENTITY RELATIONSHIP FORMATION
-When entities share identical names but have different types, CREATE explicit relationship statements:
- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)"
- **Person-Location**: "Smith (Person)" → "lives in", "founded", or "is associated with" → "Smith (City)"
- **Event-Location**: "Conference (Event)" → "takes place at" or "is hosted by" → "Conference (Venue)"
- **Product-Company**: "Tesla (Product)" → "is manufactured by" or "is developed by" → "Tesla (Company)"
- **MANDATORY**: Always create at least one relationship statement for same-name entities
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
-
 ## DURATION AND TEMPORAL CONTEXT ENTITY USAGE
 When Duration or TemporalContext entities are available in AVAILABLE ENTITIES:
 - **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements
@ -307,6 +345,28 @@ Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, H
 **Reference**: Document → references → Entity
 **Employment**: Person → works_for → Organization

+## ATOMIC BUT CONTEXTUAL FACTS
+
+When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
+
+✅ GOOD (Atomic + Contextual):
+- "Sarah prefers morning workouts at the gym"
+- "Family orders pizza for Friday movie nights"
+- "Alex drinks green tea when working late"
+- "Doctor recommends stretching exercises for back pain"
+- "Team celebrates birthdays with lunch outings"
+- "Maria reads fiction books during vacation"
+
+❌ BAD (Atomic but Decontextualized - loses scope):
+- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
+- "Family orders pizza" (when? weekends? special occasions? always?)
+- "Alex drinks green tea" (when? all day? specific times? why?)
+- "Doctor recommends stretching" (for what? general health? specific condition?)
+- "Team celebrates birthdays" (how? where? what tradition?)
+- "Maria reads fiction books" (when? always? specific contexts?)
+
+**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
+
 ## RELATIONSHIP QUALITY HIERARCHY

 ## RELATIONSHIP TEMPLATES (High Priority)
--- a/apps/webapp/app/services/search.server.ts
+++ b/apps/webapp/app/services/search.server.ts
@ -1,10 +1,6 @@
 import type { EpisodicNode, StatementNode } from "@core/types";
 import { logger } from "./logger.service";
-import {
-  applyCohereReranking,
-  applyCrossEncoderReranking,
-  applyMultiFactorMMRReranking,
-} from "./search/rerank";
+import { applyLLMReranking } from "./search/rerank";
 import {
  getEpisodesByStatements,
  performBfsSearch,
@ -14,7 +10,6 @@ import {
 import { getEmbedding } from "~/lib/model.server";
 import { prisma } from "~/db.server";
 import { runQuery } from "~/lib/neo4j.server";
-import { env } from "~/env.server";

 /**
 * SearchService provides methods to search the reified + temporal knowledge graph
@ -36,12 +31,21 @@ export class SearchService {
    query: string,
    userId: string,
    options: SearchOptions = {},
-  ): Promise<{ episodes: string[]; facts: { fact: string; validAt: Date; invalidAt: Date | null; relevantScore: number }[] }> {
+    source?: string,
+  ): Promise<{
+    episodes: string[];
+    facts: {
+      fact: string;
+      validAt: Date;
+      invalidAt: Date | null;
+      relevantScore: number;
+    }[];
+  }> {
    const startTime = Date.now();
    // Default options

    const opts: Required<SearchOptions> = {
-      limit: options.limit || 10,
+      limit: options.limit || 100,
      maxBfsDepth: options.maxBfsDepth || 4,
      validAt: options.validAt || new Date(),
      startTime: options.startTime || null,
@ -61,7 +65,7 @@ export class SearchService {
    const [bm25Results, vectorResults, bfsResults] = await Promise.all([
      performBM25Search(query, userId, opts),
      performVectorSearch(queryVector, userId, opts),
-      performBfsSearch(queryVector, userId, opts),
+      performBfsSearch(query, queryVector, userId, opts),
    ]);

    logger.info(
@ -71,16 +75,18 @@ export class SearchService {
    // 2. Apply reranking strategy
    const rankedStatements = await this.rerankResults(
      query,
+      userId,
      { bm25: bm25Results, vector: vectorResults, bfs: bfsResults },
      opts,
    );

    // // 3. Apply adaptive filtering based on score threshold and minimum count
    const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
-    // const filteredResults = rankedStatements;

    // 3. Return top results
-    const episodes = await getEpisodesByStatements(filteredResults.map((item) => item.statement));
+    const episodes = await getEpisodesByStatements(
+      filteredResults.map((item) => item.statement),
+    );

    // Log recall asynchronously (don't await to avoid blocking response)
    const responseTime = Date.now() - startTime;
@ -90,11 +96,16 @@ export class SearchService {
      filteredResults.map((item) => item.statement),
      opts,
      responseTime,
+      source,
    ).catch((error) => {
      logger.error("Failed to log recall event:", error);
    });

-    this.updateRecallCount(userId, episodes, filteredResults.map((item) => item.statement));
+    this.updateRecallCount(
+      userId,
+      episodes,
+      filteredResults.map((item) => item.statement),
+    );

    return {
      episodes: episodes.map((episode) => episode.originalContent),
@ -114,7 +125,7 @@ export class SearchService {
  private applyAdaptiveFiltering(
    results: StatementNode[],
    options: Required<SearchOptions>,
-  ): { statement: StatementNode, score: number }[] {
+  ): { statement: StatementNode; score: number }[] {
    if (results.length === 0) return [];

    let isRRF = false;
@ -152,7 +163,11 @@ export class SearchService {
    // If no scores are available, return the original results
    if (!hasScores) {
      logger.info("No scores found in results, skipping adaptive filtering");
-      return options.limit > 0 ? results.slice(0, options.limit).map((item) => ({ statement: item, score: 0 })) : results.map((item) => ({ statement: item, score: 0 }));
+      return options.limit > 0
+        ? results
+            .slice(0, options.limit)
+            .map((item) => ({ statement: item, score: 0 }))
+        : results.map((item) => ({ statement: item, score: 0 }));
    }

    // Sort by score (descending)
@ -207,9 +222,9 @@ export class SearchService {
    const limitedResults =
      options.limit > 0
        ? filteredResults.slice(
-          0,
-          Math.min(filteredResults.length, options.limit),
-        )
+            0,
+            Math.min(filteredResults.length, options.limit),
+          )
        : filteredResults;

    logger.info(
@ -227,6 +242,7 @@ export class SearchService {
   */
  private async rerankResults(
    query: string,
+    userId: string,
    results: {
      bm25: StatementNode[];
      vector: StatementNode[];
@ -234,31 +250,17 @@ export class SearchService {
    },
    options: Required<SearchOptions>,
  ): Promise<StatementNode[]> {
-    // Count non-empty result sources
-    const nonEmptySources = [
-      results.bm25.length > 0,
-      results.vector.length > 0,
-      results.bfs.length > 0,
-    ].filter(Boolean).length;
-
-    if (env.COHERE_API_KEY) {
-      logger.info("Using Cohere reranking");
-      return applyCohereReranking(query, results, options);
-    }
-
-    // If results are coming from only one source, use cross-encoder reranking
-    if (nonEmptySources <= 1) {
-      logger.info(
-        "Only one source has results, falling back to cross-encoder reranking",
-      );
-      return applyCrossEncoderReranking(query, results);
-    }
-
-    // Otherwise use combined MultiFactorReranking + MMR for multiple sources
-    return applyMultiFactorMMRReranking(results, {
-      lambda: 0.7, // Balance relevance (0.7) vs diversity (0.3)
-      maxResults: options.limit > 0 ? options.limit * 2 : 100, // Get more results for filtering
+    // Fetch user profile for context
+    const user = await prisma.user.findUnique({
+      where: { id: userId },
+      select: { name: true, id: true },
    });
+
+    const userContext = user
+      ? { name: user.name ?? undefined, userId: user.id }
+      : undefined;
+
+    return applyLLMReranking(query, results, options.limit, userContext);
  }

  private async logRecallAsync(
@ -267,6 +269,7 @@ export class SearchService {
    results: StatementNode[],
    options: Required<SearchOptions>,
    responseTime: number,
+    source?: string,
  ): Promise<void> {
    try {
      // Determine target type based on results
@ -317,7 +320,7 @@ export class SearchService {
            startTime: options.startTime?.toISOString() || null,
            endTime: options.endTime.toISOString(),
          }),
-          source: "search_api",
+          source: source ?? "search_api",
          responseTimeMs: responseTime,
          userId,
        },
--- a/apps/webapp/app/services/search/rerank.ts
+++ b/apps/webapp/app/services/search/rerank.ts
@ -442,6 +442,105 @@ export function applyMultiFactorReranking(results: {
  return sortedResults;
 }

+/**
+ * Apply LLM-based reranking for contextual understanding
+ * Uses GPT-4o-mini to verify relevance with semantic reasoning
+ */
+export async function applyLLMReranking(
+  query: string,
+  results: {
+    bm25: StatementNode[];
+    vector: StatementNode[];
+    bfs: StatementNode[];
+  },
+  limit: number = 10,
+  userContext?: { name?: string; userId: string },
+): Promise<StatementNode[]> {
+  const allResults = [
+    ...results.bm25.slice(0, 100),
+    ...results.vector.slice(0, 100),
+    ...results.bfs.slice(0, 100),
+  ];
+  const uniqueResults = combineAndDeduplicateStatements(allResults);
+  logger.info(`Unique results: ${uniqueResults.length}`);
+
+  if (uniqueResults.length === 0) {
+    logger.info("No results to rerank with Cohere");
+    return [];
+  }
+
+  // Build user context section if provided
+  const userContextSection = userContext?.name
+    ? `\nUser Identity Context:
+- The user's name is "${userContext.name}"
+- References to "user", "${userContext.name}", or pronouns like "my/their" refer to the same person
+- When matching queries about "user's X" or "${userContext.name}'s X", these are equivalent\n`
+    : "";
+
+  const prompt = `You are a relevance filter. Given a user query and a list of facts, identify ONLY the facts that are truly relevant to answering the query.
+${userContextSection}
+Query: "${query}"
+
+Facts:
+${uniqueResults.map((r, i) => `${i}. ${r.fact}`).join("\n")}
+
+Instructions:
+- A fact is RELEVANT if it directly answers or provides information needed to answer the query
+- A fact is NOT RELEVANT if it's tangentially related but doesn't answer the query
+- Consider semantic meaning, not just keyword matching
+${userContext?.name ? `- Remember: "user", "${userContext.name}", and possessive references ("my", "their") all refer to the same person` : ""}
+- Only return facts with HIGH relevance (≥80% confidence)
+- If you are not sure, return an empty array
+
+Output format:
+<output>[1, 5, 7]</output>
+
+Return ONLY the numbers of highly relevant facts inside <output> tags as a JSON array:`;
+
+  try {
+    let responseText = "";
+    await makeModelCall(
+      false,
+      [{ role: "user", content: prompt }],
+      (text) => {
+        responseText = text;
+      },
+      { temperature: 0 },
+      "high",
+    );
+
+    // Extract array from <output>[1, 5, 7]</output>
+    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
+    if (outputMatch && outputMatch[1]) {
+      responseText = outputMatch[1].trim();
+      const parsedResponse = JSON.parse(responseText || "[]");
+      const extractedIndices = Array.isArray(parsedResponse)
+        ? parsedResponse
+        : parsedResponse.entities || [];
+
+      if (extractedIndices.length === 0) {
+        logger.warn(
+          "LLM reranking returned no valid indices, falling back to original order",
+        );
+        return [];
+      }
+
+      logger.info(
+        `LLM reranking selected ${extractedIndices.length} relevant facts`,
+      );
+      const selected = extractedIndices.map((i: number) => uniqueResults[i]);
+      return selected;
+    }
+
+    return uniqueResults.slice(0, limit);
+  } catch (error) {
+    logger.error("LLM reranking failed, falling back to original order:", {
+      error,
+    });
+    return uniqueResults.slice(0, limit);
+  }
+}
+
 /**
 * Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching
 * This is particularly effective for bridging the semantic gap between questions and factual statements
@ -456,6 +555,7 @@ export async function applyCohereReranking(
  options?: {
    limit?: number;
    model?: string;
+    useLLMVerification?: boolean;
  },
 ): Promise<StatementNode[]> {
  const { model = "rerank-v3.5" } = options || {};
@ -491,10 +591,13 @@ export async function applyCohereReranking(

    // Prepare documents for Cohere API
    const documents = uniqueResults.map((statement) => statement.fact);
+    console.log("Documents:", documents);

    logger.info(
      `Cohere reranking ${documents.length} statements with model ${model}`,
    );
+    logger.info(`Cohere query: "${query}"`);
+    logger.info(`First 5 documents: ${documents.slice(0, 5).join(" | ")}`);

    // Call Cohere Rerank API
    const response = await cohere.rerank({
@ -506,14 +609,24 @@ export async function applyCohereReranking(

    console.log("Cohere reranking billed units:", response.meta?.billedUnits);

+    // Log top 5 Cohere results for debugging
+    logger.info(
+      `Cohere top 5 results:\n${response.results
+        .slice(0, 5)
+        .map(
+          (r, i) =>
+            `  ${i + 1}. [${r.relevanceScore.toFixed(4)}] ${documents[r.index].substring(0, 80)}...`,
+        )
+        .join("\n")}`,
+    );
+
    // Map results back to StatementNodes with Cohere scores
-    const rerankedResults = response.results
-      .map((result, index) => ({
-        ...uniqueResults[result.index],
-        cohereScore: result.relevanceScore,
-        cohereRank: index + 1,
-      }))
-      .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));
+    const rerankedResults = response.results.map((result, index) => ({
+      ...uniqueResults[result.index],
+      cohereScore: result.relevanceScore,
+      cohereRank: index + 1,
+    }));
+    // .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));

    const responseTime = Date.now() - startTime;
    logger.info(
--- a/apps/webapp/app/services/search/utils.ts
+++ b/apps/webapp/app/services/search/utils.ts
@ -3,6 +3,8 @@ import type { SearchOptions } from "../search.server";
 import type { Embedding } from "ai";
 import { logger } from "../logger.service";
 import { runQuery } from "~/lib/neo4j.server";
+import { getEmbedding } from "~/lib/model.server";
+import { findSimilarEntities } from "../graphModels/entity";

 /**
 * Perform BM25 keyword-based search on statements
@ -129,25 +131,26 @@ export async function performVectorSearch(
      `;
    }

-    // 1. Search for similar statements using Neo4j vector search with provenance count
+    const limit = options.limit || 100;
+    // 1. Search for similar statements using GDS cosine similarity with provenance count
    const cypher = `
-    CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
-    YIELD node AS s, score
+    MATCH (s:Statement)
    WHERE s.userId = $userId
-    AND score >= 0.7
    ${timeframeCondition}
    ${spaceCondition}
+    WITH s, gds.similarity.cosine(s.factEmbedding, $embedding) AS score
+    WHERE score >= 0.5
    OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
    WITH s, score, count(episode) as provenanceCount
    RETURN s, score, provenanceCount
    ORDER BY score DESC
+    LIMIT ${limit}
  `;

    const params = {
      embedding: query,
      userId,
      validAt: options.endTime.toISOString(),
-      topk: options.limit || 100,
      ...(options.startTime && { startTime: options.startTime.toISOString() }),
      ...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
    };
@ -170,133 +173,223 @@ export async function performVectorSearch(

 /**
 * Perform BFS traversal starting from entities mentioned in the query
+ * Uses guided search with semantic filtering to reduce noise
 */
 export async function performBfsSearch(
+  query: string,
  embedding: Embedding,
  userId: string,
  options: Required<SearchOptions>,
 ): Promise<StatementNode[]> {
  try {
-    // 1. Extract potential entities from query
-    const entities = await extractEntitiesFromQuery(embedding, userId);
+    // 1. Extract potential entities from query using chunked embeddings
+    const entities = await extractEntitiesFromQuery(query, userId);

-    // 2. For each entity, perform BFS traversal
-    const allStatements: StatementNode[] = [];
-
-    for (const entity of entities) {
-      const statements = await bfsTraversal(
-        entity.uuid,
-        options.maxBfsDepth,
-        options.endTime,
-        userId,
-        options.includeInvalidated,
-        options.startTime,
-        options.spaceIds,
-      );
-      allStatements.push(...statements);
+    if (entities.length === 0) {
+      return [];
    }

-    return allStatements;
+    // 2. Perform guided BFS with semantic filtering
+    const statements = await bfsTraversal(
+      entities,
+      embedding,
+      options.maxBfsDepth || 3,
+      options.endTime,
+      userId,
+      options.includeInvalidated,
+      options.startTime,
+    );
+
+    // Return individual statements
+    return statements;
  } catch (error) {
    logger.error("BFS search error:", { error });
    return [];
  }
 }

+
 /**
- * Perform BFS traversal starting from an entity
+ * Iterative BFS traversal - explores up to 3 hops level-by-level using Neo4j cosine similarity
 */
-export async function bfsTraversal(
-  startEntityId: string,
+async function bfsTraversal(
+  startEntities: EntityNode[],
+  queryEmbedding: Embedding,
  maxDepth: number,
  validAt: Date,
  userId: string,
  includeInvalidated: boolean,
  startTime: Date | null,
-  spaceIds: string[] = [],
 ): Promise<StatementNode[]> {
-  try {
-    // Build the WHERE clause based on timeframe options
-    let timeframeCondition = `
-      AND s.validAt <= $validAt
-      ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
-    `;
+  const RELEVANCE_THRESHOLD = 0.5;
+  const EXPLORATION_THRESHOLD = 0.3;

-    // If startTime is provided, add condition to filter by validAt >= startTime
-    if (startTime) {
-      timeframeCondition = `
-        AND s.validAt <= $validAt
-        ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
-        AND s.validAt >= $startTime
-      `;
-    }
+  const allStatements = new Map<string, number>(); // uuid -> relevance
+  const visitedEntities = new Set<string>();

-    // Add space filtering if spaceIds are provided
-    let spaceCondition = "";
-    if (spaceIds.length > 0) {
-      spaceCondition = `
-        AND s.spaceIds IS NOT NULL AND ANY(spaceId IN $spaceIds WHERE spaceId IN s.spaceIds)
-      `;
-    }
+  // Track entities per level for iterative BFS
+  let currentLevelEntities = startEntities.map(e => e.uuid);

-    // Use Neo4j's built-in path finding capabilities for efficient BFS
-    // This query implements BFS up to maxDepth and collects all statements along the way
+  // Timeframe condition for temporal filtering
+  let timeframeCondition = `
+    AND s.validAt <= $validAt
+    ${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
+  `;
+  if (startTime) {
+    timeframeCondition += ` AND s.validAt >= $startTime`;
+  }
+
+  // Process each depth level
+  for (let depth = 0; depth < maxDepth; depth++) {
+    if (currentLevelEntities.length === 0) break;
+
+    // Mark entities as visited at this depth
+    currentLevelEntities.forEach(id => visitedEntities.add(`${id}`));
+
+    // Get statements for current level entities with cosine similarity calculated in Neo4j
    const cypher = `
-      MATCH (e:Entity {uuid: $startEntityId})<-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement)
-      WHERE
-        (s.userId = $userId)
-        ${includeInvalidated ? 'AND s.validAt <= $validAt' : timeframeCondition}
-        ${spaceCondition}
-      RETURN s as statement
+      MATCH (e:Entity{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement{userId: $userId})
+      WHERE e.uuid IN $entityIds
+        ${timeframeCondition}
+      WITH DISTINCT s  // Deduplicate first
+      WITH s, gds.similarity.cosine(s.factEmbedding, $queryEmbedding) AS relevance
+      WHERE relevance >= $explorationThreshold
+      RETURN s.uuid AS uuid, relevance
+      ORDER BY relevance DESC
+      LIMIT 200  // Cap per BFS level to avoid explosion
    `;

-    const params = {
-      startEntityId,
-      maxDepth,
-      validAt: validAt.toISOString(),
+    const records = await runQuery(cypher, {
+      entityIds: currentLevelEntities,
      userId,
-      includeInvalidated,
+      queryEmbedding,
+      explorationThreshold: EXPLORATION_THRESHOLD,
+      validAt: validAt.toISOString(),
      ...(startTime && { startTime: startTime.toISOString() }),
-      ...(spaceIds.length > 0 && { spaceIds }),
-    };
+    });

-    const records = await runQuery(cypher, params);
-    return records.map(
-      (record) => record.get("statement").properties as StatementNode,
-    );
-  } catch (error) {
-    logger.error("BFS traversal error:", { error });
+    // Store statement relevance scores
+    const currentLevelStatementUuids: string[] = [];
+    for (const record of records) {
+      const uuid = record.get("uuid");
+      const relevance = record.get("relevance");
+
+      if (!allStatements.has(uuid)) {
+        allStatements.set(uuid, relevance);
+        currentLevelStatementUuids.push(uuid);
+      }
+    }
+
+    // Get connected entities for next level
+    if (depth < maxDepth - 1 && currentLevelStatementUuids.length > 0) {
+      const nextCypher = `
+        MATCH (s:Statement{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]->(e:Entity{userId: $userId})
+        WHERE s.uuid IN $statementUuids
+        RETURN DISTINCT e.uuid AS entityId
+      `;
+
+      const nextRecords = await runQuery(nextCypher, {
+        statementUuids: currentLevelStatementUuids,
+        userId
+      });
+
+      // Filter out already visited entities
+      currentLevelEntities = nextRecords
+        .map(r => r.get("entityId"))
+        .filter(id => !visitedEntities.has(`${id}`));
+
+    } else {
+      currentLevelEntities = [];
+    }
+  }
+
+  // Filter by relevance threshold and fetch full statements
+  const relevantUuids = Array.from(allStatements.entries())
+    .filter(([_, relevance]) => relevance >= RELEVANCE_THRESHOLD)
+    .sort((a, b) => b[1] - a[1])
+    .map(([uuid]) => uuid);
+
+  if (relevantUuids.length === 0) {
    return [];
  }
+
+  const fetchCypher = `
+    MATCH (s:Statement{userId: $userId})
+    WHERE s.uuid IN $uuids
+    RETURN s
+  `;
+  const fetchRecords = await runQuery(fetchCypher, { uuids: relevantUuids, userId });
+  const statements = fetchRecords.map(r => r.get("s").properties as StatementNode);
+
+  logger.info(
+    `BFS: explored ${allStatements.size} statements across ${maxDepth} hops, returning ${statements.length} (≥${RELEVANCE_THRESHOLD})`
+  );
+
+  return statements;
+}
+
+
+/**
+ * Generate query chunks (individual words and bigrams) for entity extraction
+ */
+function generateQueryChunks(query: string): string[] {
+  const words = query.toLowerCase()
+    .trim()
+    .split(/\s+/)
+    .filter(word => word.length > 0);
+
+  const chunks: string[] = [];
+
+  // Add individual words (for entities like "user")
+  chunks.push(...words);
+
+  // Add bigrams (for multi-word entities like "home address")
+  for (let i = 0; i < words.length - 1; i++) {
+    chunks.push(`${words[i]} ${words[i + 1]}`);
+  }
+
+  // Add full query as final chunk
+  chunks.push(query.toLowerCase().trim());
+
+  return chunks;
 }

 /**
- * Extract potential entities from a query using embeddings or LLM
+ * Extract potential entities from a query using chunked embeddings
+ * Chunks query into words/bigrams, embeds each chunk, finds entities for each
 */
 export async function extractEntitiesFromQuery(
-  embedding: Embedding,
+  query: string,
  userId: string,
 ): Promise<EntityNode[]> {
  try {
-    // Use vector similarity to find relevant entities
-    const cypher = `
-        // Match entities using vector index on name embeddings
-        CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
-        YIELD node AS e, score
-        WHERE e.userId = $userId
-          AND score > 0.7
-        RETURN e
-        ORDER BY score DESC
-      `;
+    // Generate chunks from query
+    const chunks = generateQueryChunks(query);

-    const params = {
-      embedding,
-      userId,
-    };
+    // Get embeddings for each chunk
+    const chunkEmbeddings = await Promise.all(
+      chunks.map(chunk => getEmbedding(chunk))
+    );

-    const records = await runQuery(cypher, params);
+    // Search for entities matching each chunk embedding
+    const allEntitySets = await Promise.all(
+      chunkEmbeddings.map(async (embedding) => {
+        return await findSimilarEntities({
+          queryEmbedding: embedding,
+          limit: 3,
+          threshold: 0.7,
+          userId,
+        });
+      })
+    );

-    return records.map((record) => record.get("e").properties as EntityNode);
+    // Flatten and deduplicate entities by ID
+    const allEntities = allEntitySets.flat();
+    const uniqueEntities = Array.from(
+      new Map(allEntities.map(e => [e.uuid, e])).values()
+    );
+
+    return uniqueEntities;
  } catch (error) {
    logger.error("Entity extraction error:", { error });
    return [];
--- a/apps/webapp/app/trigger/chat/chat-utils.ts
+++ b/apps/webapp/app/trigger/chat/chat-utils.ts
@ -17,13 +17,12 @@ import { generate, processTag } from "./stream-utils";
 import { type AgentMessage, AgentMessageType, Message } from "./types";
 import { type MCP } from "../utils/mcp";
 import {
-  WebSearchSchema,
  type ExecutionState,
  type HistoryStep,
  type Resource,
  type TotalCost,
 } from "../utils/types";
-import { flattenObject, webSearch } from "../utils/utils";
+import { flattenObject } from "../utils/utils";
 import { searchMemory, addMemory, searchSpaces } from "./memory-utils";

 interface LLMOutputInterface {
@ -119,12 +118,6 @@ const searchSpacesTool = tool({
  }),
 });

-const websearchTool = tool({
-  description:
-    "Search the web for current information and news. Use this when you need up-to-date information that might not be in your training data. Try different search strategies: broad terms first, then specific phrases, keywords, exact quotes. Use multiple searches with varied approaches to get comprehensive results.",
-  parameters: WebSearchSchema,
-});
-
 const loadMCPTools = tool({
  description:
    "Load tools for a specific integration. Call this when you need to use a third-party service.",
@ -310,7 +303,6 @@ export async function* run(
    "core--search_memory": searchMemoryTool,
    "core--add_memory": addMemoryTool,
    "core--search_spaces": searchSpacesTool,
-    "core--websearch": websearchTool,
    "core--load_mcp": loadMCPTools,
  };

@ -578,16 +570,6 @@ export async function* run(
                  });
                  result = "Search spaces call failed";
                }
-              } else if (toolName === "websearch") {
-                try {
-                  result = await webSearch(skillInput);
-                } catch (apiError) {
-                  logger.error("Web search failed", {
-                    apiError,
-                  });
-                  result =
-                    "Web search failed - please check your search configuration";
-                }
              } else if (toolName === "load_mcp") {
                // Load MCP integration and update available tools
                await mcp.load(skillInput.integration, mcpHeaders);
--- a/apps/webapp/app/trigger/chat/prompt.ts
+++ b/apps/webapp/app/trigger/chat/prompt.ts
@ -1,5 +1,5 @@
 export const REACT_SYSTEM_PROMPT = `
-You are a helpful AI assistant with access to user memory and web search capabilities. Your primary capabilities are:
+You are a helpful AI assistant with access to user memory. Your primary capabilities are:

 1. **Memory-First Approach**: Always check user memory first to understand context and previous interactions
 2. **Intelligent Information Gathering**: Analyze queries to determine if current information is needed
@ -19,43 +19,17 @@ Follow this intelligent approach for information gathering:
   - Memory provides context, personal preferences, and historical information
   - Use memory to understand user's background, ongoing projects, and past conversations

-2. **QUERY ANALYSIS** (Determine Information Needs)
-   Analyze the user's query to identify if it requires current/latest information:
-   
-   **Use web search (core--websearch) when query involves:**
-   - Current events, news, or recent developments
-   - "Latest", "recent", "current", "today", "now" keywords
-   - Stock prices, market data, or financial information
-   - Software updates, version releases, or technical documentation
-   - Weather, traffic, or real-time data
-   - Recent changes to websites, APIs, or services
-   - Product releases, availability, or pricing
-   - Breaking news or trending topics
-   - Verification of potentially outdated information
-
-   **Examples requiring web search:**
-   - "What's the latest news about..."
-   - "Current price of..."
-   - "Recent updates to..."
-   - "What happened today..."
-   - "Latest version of..."
-
-3. **INFORMATION SYNTHESIS** (Combine Sources)
-   - Combine memory context with web search results when both are relevant
+2. **INFORMATION SYNTHESIS** (Combine Sources)
   - Use memory to personalize current information based on user preferences
-   - Cross-reference web findings with user's historical interests from memory
   - Always store new useful information in memory using core--add_memory

-4. **TRAINING KNOWLEDGE** (Foundation)
+3. **TRAINING KNOWLEDGE** (Foundation)
   - Use your training knowledge as the foundation for analysis and explanation
-   - Apply training knowledge to interpret and contextualize information from memory and web
-   - Fill gaps where memory and web search don't provide complete answers
+   - Apply training knowledge to interpret and contextualize information from memory
   - Indicate when you're using training knowledge vs. live information sources

 EXECUTION APPROACH:
 - Memory search is mandatory for every interaction
- Web search is conditional based on query analysis
- Both can be executed in parallel when web search is needed
 - Always indicate your information sources in responses
 </information_gathering>

@ -95,7 +69,6 @@ MEMORY USAGE:
 - Blend memory insights naturally into responses
 - Verify you've checked relevant memory before finalizing ANY response

-If memory access is unavailable, proceed to web search or rely on current conversation
 </memory>

 <external_services>
@ -113,7 +86,6 @@ You have tools at your disposal to assist users:
 CORE PRINCIPLES:
 - Use tools only when necessary for the task at hand
 - Always check memory FIRST before making other tool calls
- Use web search when query analysis indicates need for current information
 - Execute multiple operations in parallel whenever possible
 - Use sequential calls only when output of one is required for input of another

@ -162,7 +134,7 @@ QUESTIONS - When you need information:
 <p>[Your question with HTML formatting]</p>
 </question_response>

- Ask questions only when you cannot find information through memory, web search, or tools
+- Ask questions only when you cannot find information through memory, or tools
 - Be specific about what you need to know
 - Provide context for why you're asking

@ -176,7 +148,7 @@ CRITICAL:
 - Apply proper HTML formatting (<h1>, <h2>, <p>, <ul>, <li>, etc.)
 - Never mix communication formats
 - Keep responses clear and helpful
- Always indicate your information sources (memory, web search, and/or knowledge)
+- Always indicate your information sources (memory, and/or knowledge)
 </communication>
 `;

--- a/apps/webapp/app/trigger/utils/types.ts
+++ b/apps/webapp/app/trigger/utils/types.ts
@ -122,67 +122,3 @@ export interface GenerateResponse {
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  toolCalls: any[];
 }
-
-export interface WebSearchResult {
-  results: Array<{
-    title: string;
-    url: string;
-    content: string;
-    publishedDate: string;
-    highlights: string[];
-    text: string;
-    score: number;
-  }>;
-}
-
-export const WebSearchSchema = z.object({
-  query: z
-    .string()
-    .min(1)
-    .describe("The search query to find relevant web content"),
-  numResults: z
-    .number()
-    .min(1)
-    .max(20)
-    .optional()
-    .default(5)
-    .describe("Number of results to return (1-20, default: 5)"),
-  includeContent: z
-    .boolean()
-    .optional()
-    .default(false)
-    .describe("Whether to include full page content in results"),
-  includeHighlights: z
-    .boolean()
-    .optional()
-    .default(false)
-    .describe("Whether to include relevant text highlights from pages"),
-  domains: z
-    .array(z.string())
-    .optional()
-    .describe(
-      'Array of domains to include in search (e.g., ["github.com", "stackoverflow.com"])',
-    ),
-  excludeDomains: z
-    .array(z.string())
-    .optional()
-    .describe("Array of domains to exclude from search"),
-  startCrawlDate: z
-    .string()
-    .optional()
-    .describe("Start date for content crawling in YYYY-MM-DD format"),
-  endCrawlDate: z
-    .string()
-    .optional()
-    .describe("End date for content crawling in YYYY-MM-DD format"),
-  startPublishedDate: z
-    .string()
-    .optional()
-    .describe("Start date for content publishing in YYYY-MM-DD format"),
-  endPublishedDate: z
-    .string()
-    .optional()
-    .describe("End date for content publishing in YYYY-MM-DD format"),
-});
-
-export type WebSearchArgs = z.infer<typeof WebSearchSchema>;
--- a/apps/webapp/app/trigger/utils/utils.ts
+++ b/apps/webapp/app/trigger/utils/utils.ts
@ -12,11 +12,7 @@ import {
 import { logger } from "@trigger.dev/sdk/v3";
 import { type CoreMessage } from "ai";

-import {
-  type WebSearchArgs,
-  type WebSearchResult,
-  type HistoryStep,
-} from "./types";
+import { type HistoryStep } from "./types";
 import axios from "axios";
 import nodeCrypto from "node:crypto";
 import { customAlphabet, nanoid } from "nanoid";
@ -496,72 +492,6 @@ export async function deletePersonalAccessToken(tokenId: string) {
  });
 }

-export async function webSearch(args: WebSearchArgs): Promise<WebSearchResult> {
-  const apiKey = process.env.EXA_API_KEY;
-
-  if (!apiKey) {
-    throw new Error(
-      "EXA_API_KEY environment variable is required for web search",
-    );
-  }
-
-  const exa = new Exa(apiKey);
-
-  try {
-    const searchOptions = {
-      numResults: args.numResults || 5,
-      ...(args.domains && { includeDomains: args.domains }),
-      ...(args.excludeDomains && { excludeDomains: args.excludeDomains }),
-      ...(args.startCrawlDate && { startCrawlDate: args.startCrawlDate }),
-      ...(args.endCrawlDate && { endCrawlDate: args.endCrawlDate }),
-      ...(args.startPublishedDate && {
-        startPublishedDate: args.startPublishedDate,
-      }),
-      ...(args.endPublishedDate && { endPublishedDate: args.endPublishedDate }),
-    };
-
-    let result;
-
-    if (args.includeContent || args.includeHighlights) {
-      // Use searchAndContents for rich results
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      const contentsOptions: any = {
-        ...searchOptions,
-      };
-
-      if (args.includeContent) {
-        contentsOptions.text = true;
-      }
-
-      if (args.includeHighlights) {
-        contentsOptions.highlights = true;
-      }
-
-      result = await exa.searchAndContents(args.query, contentsOptions);
-    } else {
-      // Use basic search for URLs only
-      result = await exa.search(args.query, searchOptions);
-    }
-
-    return {
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      results: result.results.map((item: any) => ({
-        title: item.title,
-        url: item.url,
-        content: item.text,
-        publishedDate: item.publishedDate,
-        highlights: item.highlights,
-        text: item.text,
-        score: item.score,
-      })),
-    };
-  } catch (error) {
-    throw new Error(
-      `Web search failed: ${error instanceof Error ? error.message : "Unknown error"}`,
-    );
-  }
-}
-
 // Credit management functions have been moved to ~/services/billing.server.ts
 // Use deductCredits() instead of these functions
 export type CreditOperation = "addEpisode" | "search" | "chatMessage";
@ -720,10 +650,6 @@ export async function deductCredits(
        }),
      ]);
    } else {
-      // Free plan - throw error
-      throw new InsufficientCreditsError(
-        "Insufficient credits. Please upgrade to Pro or Max plan to continue.",
-      );
    }
  }
 }
--- a/apps/webapp/app/utils/mcp/memory.ts
+++ b/apps/webapp/app/utils/mcp/memory.ts
@ -198,10 +198,15 @@ async function handleMemoryIngest(args: any) {
 // Handler for memory_search
 async function handleMemorySearch(args: any) {
  try {
-    const results = await searchService.search(args.query, args.userId, {
-      startTime: args.startTime ? new Date(args.startTime) : undefined,
-      endTime: args.endTime ? new Date(args.endTime) : undefined,
-    });
+    const results = await searchService.search(
+      args.query,
+      args.userId,
+      {
+        startTime: args.startTime ? new Date(args.startTime) : undefined,
+        endTime: args.endTime ? new Date(args.endTime) : undefined,
+      },
+      args.source,
+    );

    return {
      content: [