diff --git a/apps/webapp/app/services/search.server.ts b/apps/webapp/app/services/search.server.ts
index 7cda5df..2750f74 100644
--- a/apps/webapp/app/services/search.server.ts
+++ b/apps/webapp/app/services/search.server.ts
@@ -13,6 +13,7 @@ import {
 import { getEmbedding, makeModelCall } from "~/lib/model.server";
 import { prisma } from "~/db.server";
 import { runQuery } from "~/lib/neo4j.server";
+import { encode } from "gpt-tokenizer/encoding/o200k_base";
 
 /**
  * SearchService provides methods to search the reified + temporal knowledge graph
@@ -162,25 +163,6 @@ export class SearchService {
         `confidence: ${qualityFilter.confidence.toFixed(2)}`,
     );
 
-    // Log recall asynchronously (don't await to avoid blocking response)
-    const responseTime = Date.now() - startTime;
-    this.logRecallAsync(
-      query,
-      userId,
-      filteredResults.map((item) => item.statement),
-      opts,
-      responseTime,
-      source,
-    ).catch((error) => {
-      logger.error("Failed to log recall event:", error);
-    });
-
-    this.updateRecallCount(
-      userId,
-      episodes,
-      filteredResults.map((item) => item.statement),
-    );
-
     // Replace session episodes with compacts automatically
     const unifiedEpisodes = await this.replaceWithCompacts(episodes, userId);
 
@@ -191,6 +173,41 @@ export class SearchService {
       relevantScore: statement.score,
     }));
 
+    // Calculate response content for token counting
+    let responseContent: string;
+    if (opts.structured) {
+      responseContent = JSON.stringify({
+        episodes: unifiedEpisodes,
+        facts: factsData,
+      });
+    } else {
+      responseContent = this.formatAsMarkdown(unifiedEpisodes, factsData);
+    }
+
+    // Estimate token count (rough approximation: 1 token ≈ 4 characters)
+    const tokenCount = encode(responseContent).length;
+
+    // Update the async log with token count
+    const responseTime = Date.now() - startTime;
+
+    this.updateRecallCount(
+      userId,
+      episodes,
+      filteredResults.map((item) => item.statement),
+    );
+
+    this.logRecallAsync(
+      query,
+      userId,
+      episodes.length,
+      opts,
+      responseTime,
+      source,
+      tokenCount,
+    ).catch((error) => {
+      logger.error("Failed to log recall event:", error);
+    });
+
     // Return markdown by default, structured JSON if requested
     if (opts.structured) {
       return {
@@ -200,47 +217,27 @@ export class SearchService {
     }
 
     // Return markdown formatted context
-    return this.formatAsMarkdown(unifiedEpisodes, factsData);
+    return responseContent;
   }
 
   private async logRecallAsync(
     query: string,
     userId: string,
-    results: StatementNode[],
+    episodeCount: number,
     options: Required<SearchOptions>,
     responseTime: number,
     source?: string,
+    tokenCount?: number,
   ): Promise<void> {
     try {
-      // Determine target type based on results
+      // Determine target type based on episode count
       let targetType = "mixed_results";
-      if (results.length === 1) {
-        targetType = "statement";
-      } else if (results.length === 0) {
+      if (episodeCount === 1) {
+        targetType = "episodic";
+      } else if (episodeCount === 0) {
         targetType = "no_results";
       }
 
-      // Calculate average similarity score if available
-      let averageSimilarityScore: number | null = null;
-      const scoresWithValues = results
-        .map((result) => {
-          // Try to extract score from various possible score fields
-          const score =
-            (result as any).rrfScore ||
-            (result as any).mmrScore ||
-            (result as any).crossEncoderScore ||
-            (result as any).finalScore ||
-            (result as any).score;
-          return score && typeof score === "number" ? score : null;
-        })
-        .filter((score): score is number => score !== null);
-
-      if (scoresWithValues.length > 0) {
-        averageSimilarityScore =
-          scoresWithValues.reduce((sum, score) => sum + score, 0) /
-          scoresWithValues.length;
-      }
-
       await prisma.recallLog.create({
         data: {
           accessType: "search",
@@ -249,8 +246,8 @@ export class SearchService {
           searchMethod: "hybrid", // BM25 + Vector + BFS
           minSimilarity: options.scoreThreshold,
           maxResults: options.limit,
-          resultCount: results.length,
-          similarityScore: averageSimilarityScore,
+          resultCount: episodeCount,
+          similarityScore: null,
           context: JSON.stringify({
             entityTypes: options.entityTypes,
             predicateTypes: options.predicateTypes,
@@ -262,12 +259,15 @@ export class SearchService {
           }),
           source: source ?? "search_api",
           responseTimeMs: responseTime,
+          metadata: {
+            tokenCount: tokenCount || 0,
+          },
           userId,
         },
       });
 
       logger.debug(
-        `Logged recall event for user ${userId}: ${results.length} results in ${responseTime}ms`,
+        `Logged recall event for user ${userId}: ${episodeCount} episodes, ${tokenCount} tokens in ${responseTime}ms`,
       );
     } catch (error) {
       logger.error("Error creating recall log entry:", { error });
diff --git a/apps/webapp/app/utils/mcp/memory.ts b/apps/webapp/app/utils/mcp/memory.ts
index 0bf2f28..9bc5792 100644
--- a/apps/webapp/app/utils/mcp/memory.ts
+++ b/apps/webapp/app/utils/mcp/memory.ts
@@ -17,7 +17,28 @@ const SearchParamsSchema = {
     query: {
       type: "string",
       description:
-        "Search query as a simple statement or question. Write what you want to find, not a command. GOOD: 'user preferences for code style' or 'previous bugs in authentication' or 'GitHub integration setup'. BAD: 'search for' or 'find me' or 'get the'. Just state the topic directly.",
+        "Search query optimized for knowledge graph retrieval. Choose the right query structure based on your search intent:\n\n" +
+        "1. **Entity-Centric Queries** (Best for graph search):\n" +
+        "   - ✅ GOOD: \"User's preferences for code style and formatting\"\n" +
+        "   - ✅ GOOD: \"Project authentication implementation decisions\"\n" +
+        "   - ❌ BAD: \"user code style\"\n" +
+        "   - Format: [Person/Project] + [relationship/attribute] + [context]\n\n" +
+        "2. **Multi-Entity Relationship Queries** (Excellent for episode graph):\n" +
+        "   - ✅ GOOD: \"User and team discussions about API design patterns\"\n" +
+        "   - ✅ GOOD: \"relationship between database schema and performance optimization\"\n" +
+        "   - ❌ BAD: \"user team api design\"\n" +
+        "   - Format: [Entity1] + [relationship type] + [Entity2] + [context]\n\n" +
+        "3. **Semantic Question Queries** (Good for vector search):\n" +
+        "   - ✅ GOOD: \"What causes authentication errors in production? What are the security requirements?\"\n" +
+        "   - ✅ GOOD: \"How does caching improve API response times compared to direct database queries?\"\n" +
+        "   - ❌ BAD: \"auth errors production\"\n" +
+        "   - Format: Complete natural questions with full context\n\n" +
+        "4. **Concept Exploration Queries** (Good for BFS traversal):\n" +
+        "   - ✅ GOOD: \"concepts and ideas related to database indexing and query optimization\"\n" +
+        "   - ✅ GOOD: \"topics connected to user authentication and session management\"\n" +
+        "   - ❌ BAD: \"database indexing concepts\"\n" +
+        "   - Format: [concept] + related/connected + [domain/context]\n\n" +
+        "Avoid keyword soup queries - use complete phrases with proper context for best results.",
     },
     validAt: {
       type: "string",