Fix: better ingest and recall episodes

2026-01-22 03:58:35 +00:00 · 2025-08-27 01:13:05 +05:30 · 2025-08-27 01:13:05 +05:30 · 906d2fc18a
commit 906d2fc18a
parent bbcab15916
18 changed files with 191 additions and 71132 deletions
--- a/apps/webapp/app/lib/neo4j.server.ts
+++ b/apps/webapp/app/lib/neo4j.server.ts
@ -312,6 +312,18 @@ const initializeSchema = async () => {
    await runQuery(
      "CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
    );
    await runQuery(
      "CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)",
    );
    await runQuery(
      "CREATE INDEX entity_type IF NOT EXISTS FOR (n:Entity) ON (n.type)",
    );
    await runQuery(
      "CREATE INDEX entity_user_id IF NOT EXISTS FOR (n:Entity) ON (n.userId)",
    );
    await runQuery(
      "CREATE INDEX statement_user_id IF NOT EXISTS FOR (n:Statement) ON (n.userId)",
    );
    await runQuery(
      "CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)",
    );
@ -322,17 +334,17 @@ const initializeSchema = async () => {
    // Create vector indexes for semantic search (if using Neo4j 5.0+)
    await runQuery(`
      CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
-      OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
+      OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
    `);
    await runQuery(`
      CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
-      OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
+      OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
    `);
    await runQuery(`
      CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
-      OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
+      OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
    `);
    // Create fulltext indexes for BM25 search
@ -348,7 +360,7 @@ const initializeSchema = async () => {
    await runQuery(`
      CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS
-      FOR (n:Entity) ON EACH [n.name, n.description]
+      FOR (n:Entity) ON EACH [n.name]
      OPTIONS {
        indexConfig: {
          \`fulltext.analyzer\`: 'english'
--- a/apps/webapp/app/routes/api.v1.qa.tsx
+++ b/apps/webapp/app/routes/api.v1.qa.tsx
@ -45,9 +45,13 @@ const { action, loader } = createActionApiRoute(
    );
    // Combine episodes and facts into context
-    const context = [...searchResults.episodes, ...searchResults.facts].join("\n\n");
+    let context = [...searchResults.episodes].join("\n\n");
-    console.log("Context:", context);
+    searchResults.facts.map((fact) => {
      context += `\n\nfact: ${fact.fact}\n validAt: ${fact.validAt}`;
    });
    // console.log("Context:", context);
    if (!context.trim()) {
      return json({
@ -57,32 +61,77 @@ const { action, loader } = createActionApiRoute(
    }
    // Generate answer using LLM
-    const prompt = `Based on the following context information, please answer the question. Be specific and concise, using only information from the provided context. If the context doesn't contain enough information to answer the question, say so.
+    const prompt = `You are an analytical AI that reasons deeply about context before answering questions. Your task is to:
-Context:
+1. FIRST: Look for direct, explicit answers in the context
-${context}
+2. ANALYZE the context thoroughly for relevant information
 3. IDENTIFY patterns, connections, and implications 
 4. REASON about what the context suggests or implies
 5. ANSWER based on direct evidence OR analysis
-Question: ${body.question}
+<reasoning>
 - Scan through ALL episodes and facts completely before answering
 - Look for every explicit statement that relates to the question
 - NEVER stop after finding the first answer - continue scanning for more
 - When asking "what did X show Y", look for ALL items X showed Y on that date
 - Collect multiple items, events, or details that answer the same question
 - If not found directly, identify all context elements related to the question
 - Look for patterns, themes, and implicit information in the context
 - Consider what the context suggests beyond explicit statements
 - Note any contradictions or missing information that affects the answer
 - Pay close attention to temporal information and dates (validAt timestamps)
 - For time-sensitive questions, prioritize more recent information
 - Consider the chronological sequence of events when relevant
 - CRITICAL: Ensure completeness by including ALL relevant items found
 - If you find 2+ items for the same question, mention them all in your answer
 - Be precise with details (specific types, colors, descriptions when available)
 - Draw logical conclusions based on available evidence
 - Don't give reasoning in the output
 </reasoning>
-Answer:`;
+Follow this output format. don't give the JSON with \`\`\`json
 <output>
 {"answer" : "Your direct, short(max 2 sentences) answer based on your analysis"}
 </output>
 `;
-    let generatedAnswer = "";
+    const userPrompt = `<context>
    ${context}
    </context>
    <question>
    Question: ${body.question}
    </question>
    `;
    let responseText = "";
    let generated_answer = "";
    try {
-      generatedAnswer = await makeModelCall(
+      await makeModelCall(
        false, // Don't stream
-        [{ role: "user", content: prompt }],
+        [{ role: "system", content: prompt }, { role: "user", content: userPrompt }],
-        (_text: string, _model: string) => {
+        (text) => {
-          // onFinish callback - we can log model usage here if needed
+          responseText = text;
        }
-      ) as string;
+      );
      const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
      if (outputMatch && outputMatch[1]) {
        try {
          const parsedOutput = JSON.parse(outputMatch[1].trim());
          generated_answer = parsedOutput.answer || "No answer provided";
        } catch (jsonError) {
          console.error("Error parsing JSON output:", jsonError);
          generated_answer = outputMatch[1].trim();
        }
      }
    } catch (error) {
      console.error("Error generating answer:", error);
-      generatedAnswer = "I encountered an error while generating an answer to this question.";
+      generated_answer = "I encountered an error while generating an answer to this question.";
    }
    return json({
      question: body.question,
-      generated_answer: generatedAnswer.trim(),
+      generated_answer,
    });
  },
 );
--- a/apps/webapp/app/services/graphModels/entity.ts
+++ b/apps/webapp/app/services/graphModels/entity.ts
@ -83,16 +83,15 @@ export async function findSimilarEntities(params: {
  userId: string;
 }): Promise<EntityNode[]> {
  const query = `
-          MATCH (entity:Entity)
+          CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
-          WHERE entity.nameEmbedding IS NOT NULL
+          YIELD node AS entity, score
          WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
          WHERE score >= $threshold
          AND entity.userId = $userId
          RETURN entity, score
          ORDER BY score DESC
        `;
-  const result = await runQuery(query, params);
+  const result = await runQuery(query, { ...params, topK: params.limit });
  return result.map((record) => {
    const entity = record.get("entity").properties;
@ -118,9 +117,8 @@ export async function findSimilarEntitiesWithSameType(params: {
  userId: string;
 }): Promise<EntityNode[]> {
  const query = `
-          MATCH (entity:Entity)
+          CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
-          WHERE entity.nameEmbedding IS NOT NULL
+          YIELD node AS entity, score
          WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
          WHERE score >= $threshold
          AND entity.userId = $userId
          AND entity.type = $entityType
@ -128,7 +126,7 @@ export async function findSimilarEntitiesWithSameType(params: {
          ORDER BY score DESC
        `;
-  const result = await runQuery(query, params);
+  const result = await runQuery(query, { ...params, topK: params.limit });
  return result.map((record) => {
    const entity = record.get("entity").properties;
--- a/apps/webapp/app/services/graphModels/episode.ts
+++ b/apps/webapp/app/services/graphModels/episode.ts
@ -137,16 +137,10 @@ export async function searchEpisodesByEmbedding(params: {
  minSimilarity?: number;
 }) {
  const query = `
-  MATCH (episode:Episode)
+  CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
  YIELD node AS episode, score
  WHERE episode.userId = $userId
-    AND episode.contentEmbedding IS NOT NULL
+    AND score >= $minSimilarity
  WITH episode, 
       CASE 
         WHEN size(episode.contentEmbedding) = size($embedding) 
         THEN vector.similarity.cosine($embedding, episode.contentEmbedding) 
         ELSE 0 
       END AS score
  WHERE score >= $minSimilarity
  RETURN episode, score
  ORDER BY score DESC`;
@ -154,6 +148,7 @@ export async function searchEpisodesByEmbedding(params: {
    embedding: params.embedding,
    minSimilarity: params.minSimilarity,
    userId: params.userId,
    topK: 100,
  });
  if (!result || result.length === 0) {
@ -283,15 +278,10 @@ export async function getRelatedEpisodesEntities(params: {
  minSimilarity?: number;
 }) {
  const query = `
-  MATCH (episode:Episode {userId: $userId})
+  CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
-  WHERE episode.contentEmbedding IS NOT NULL
+  YIELD node AS episode, score
-  WITH episode, 
+  WHERE episode.userId = $userId
-       CASE 
+    AND score >= $minSimilarity
         WHEN size(episode.contentEmbedding) = size($embedding) 
         THEN vector.similarity.cosine($embedding, episode.contentEmbedding) 
         ELSE 0 
       END AS score
  WHERE score >= $minSimilarity
  OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
  WHERE entity IS NOT NULL
  RETURN DISTINCT entity`;
@ -300,6 +290,7 @@ export async function getRelatedEpisodesEntities(params: {
    embedding: params.embedding,
    minSimilarity: params.minSimilarity,
    userId: params.userId,
    topK: params.limit || 100,
  });
  return result
--- a/apps/webapp/app/services/graphModels/statement.ts
+++ b/apps/webapp/app/services/graphModels/statement.ts
@ -110,11 +110,10 @@ export async function findContradictoryStatements({
  userId: string;
 }): Promise<StatementNode[]> {
  const query = `
-      MATCH (statement:Statement)
+      MATCH (subject:Entity {uuid: $subjectId}), (predicate:Entity {uuid: $predicateId})
      MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_PREDICATE]->(predicate)
      WHERE statement.userId = $userId
        AND statement.invalidAt IS NULL
      MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
      WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
      RETURN statement
    `;
@ -157,18 +156,21 @@ export async function findStatementsWithSameSubjectObject({
  userId: string;
 }): Promise<StatementNode[]> {
  const query = `
-      MATCH (statement:Statement)
+      MATCH (subject:Entity {uuid: $subjectId}), (object:Entity {uuid: $objectId})
      MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_OBJECT]->(object)
      MATCH (statement)-[:HAS_PREDICATE]->(predicate:Entity)
      WHERE statement.userId = $userId
        AND statement.invalidAt IS NULL
      MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
      MATCH (statement)-[:HAS_OBJECT]->(object:Entity)
      WHERE subject.uuid = $subjectId 
        AND object.uuid = $objectId
        ${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""}
      RETURN statement
    `;
-  const params = { subjectId, objectId, userId, ...(excludePredicateId && { excludePredicateId }) };
+  const params = {
    subjectId,
    objectId,
    userId,
    ...(excludePredicateId && { excludePredicateId }),
  };
  const result = await runQuery(query, params);
  if (!result || result.length === 0) {
@ -207,13 +209,12 @@ export async function findSimilarStatements({
  userId: string;
 }): Promise<StatementNode[]> {
  const query = `
-      MATCH (statement:Statement)
+      CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
      YIELD node AS statement, score
      WHERE statement.userId = $userId
-        AND statement.invalidAt IS NULL 
+        AND statement.invalidAt IS NULL
-        AND statement.factEmbedding IS NOT NULL
+        AND score >= $threshold
        ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
      WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
      WHERE score >= $threshold
      RETURN statement, score
      ORDER BY score DESC
    `;
@ -223,6 +224,7 @@ export async function findSimilarStatements({
    threshold,
    excludeIds,
    userId,
    topK: 100,
  });
  if (!result || result.length === 0) {
@ -396,17 +398,11 @@ export async function searchStatementsByEmbedding(params: {
  minSimilarity?: number;
 }) {
  const query = `
-  MATCH (statement:Statement)
+  CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
  YIELD node AS statement, score
  WHERE statement.userId = $userId
-    AND statement.invalidAt IS NULL 
+    AND statement.invalidAt IS NULL
-    AND statement.factEmbedding IS NOT NULL
+    AND score >= $minSimilarity
  WITH statement, 
       CASE 
         WHEN size(statement.factEmbedding) = size($embedding) 
         THEN vector.similarity.cosine($embedding, statement.factEmbedding) 
         ELSE 0 
       END AS score
  WHERE score >= $minSimilarity
  RETURN statement, score
  ORDER BY score DESC
 `;
@ -416,6 +412,7 @@ export async function searchStatementsByEmbedding(params: {
    minSimilarity: params.minSimilarity,
    limit: params.limit,
    userId: params.userId,
    topK: params.limit || 100,
  });
  if (!result || result.length === 0) {
--- a/apps/webapp/app/services/knowledgeGraph.server.ts
+++ b/apps/webapp/app/services/knowledgeGraph.server.ts
@ -112,17 +112,10 @@ export class KnowledgeGraphService {
        sessionContext,
      );
-      console.log("Normalized episode body:", normalizedEpisodeBody);
+      const normalizedTime = Date.now() - startTime;
-      const relatedEpisodesEntities = await getRelatedEpisodesEntities({
+      logger.log(`Normalized episode body in ${normalizedTime} ms`);
        embedding: await this.getEmbedding(normalizedEpisodeBody),
        userId: params.userId,
        minSimilarity: 0.7,
      });
-      if (
+      if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
        normalizedEpisodeBody === "NOTHING_TO_REMEMBER" ||
        normalizedEpisodeBody === ""
      ) {
        logger.log("Nothing to remember");
        return {
          episodeUuid: null,
@ -131,6 +124,17 @@ export class KnowledgeGraphService {
        };
      }
      const relatedEpisodesEntities = await getRelatedEpisodesEntities({
        embedding: await this.getEmbedding(normalizedEpisodeBody),
        userId: params.userId,
        minSimilarity: 0.7,
      });
      const relatedTime = Date.now();
      logger.log(
        `Related episodes entities in ${relatedTime - normalizedTime} ms`,
      );
      // Step 2: Episode Creation - Create or retrieve the episode
      const episode: EpisodicNode = {
        uuid: crypto.randomUUID(),
@ -153,6 +157,9 @@ export class KnowledgeGraphService {
        previousEpisodes,
      );
      const extractedTime = Date.now();
      logger.log(`Extracted entities in ${extractedTime - relatedTime} ms`);
      // Step 3.1: Context-aware entity resolution with preset type evolution
      await this.resolveEntitiesWithContext(
        extractedNodes,
@ -165,15 +172,9 @@ export class KnowledgeGraphService {
        episode,
      );
-      console.log(
+      const expandedTime = Date.now();
-        "Categorized entities:",
+      logger.log(`Expanded entities in ${expandedTime - extractedTime} ms`);
-        categorizedEntities.primary.map(
+
          (entity) => `primary: ${entity.name} - ${entity.type}`,
        ),
        categorizedEntities.expanded.map(
          (entity) => `expanded: ${entity.name} - ${entity.type}`,
        ),
      );
      // Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges
      const extractedStatements = await this.extractStatements(
        episode,
@ -181,6 +182,11 @@ export class KnowledgeGraphService {
        previousEpisodes,
      );
      const extractedStatementsTime = Date.now();
      logger.log(
        `Extracted statements in ${extractedStatementsTime - expandedTime} ms`,
      );
      // Step 5: Entity Resolution - Resolve extracted nodes to existing nodes or create new ones
      const resolvedTriples = await this.resolveExtractedNodes(
        extractedStatements,
@ -188,6 +194,11 @@ export class KnowledgeGraphService {
        previousEpisodes,
      );
      const resolvedTriplesTime = Date.now();
      logger.log(
        `Resolved Entities in ${resolvedTriplesTime - extractedStatementsTime} ms`,
      );
      // Step 6: Statement Resolution - Resolve statements and detect contradictions
      const { resolvedStatements, invalidatedStatements } =
        await this.resolveStatements(
@ -196,12 +207,22 @@ export class KnowledgeGraphService {
          previousEpisodes,
        );
      const resolvedStatementsTime = Date.now();
      logger.log(
        `Resolved statements in ${resolvedStatementsTime - resolvedTriplesTime} ms`,
      );
      // Step 7: ADd attributes to entity nodes
      const updatedTriples = await this.addAttributesToEntities(
        resolvedStatements,
        episode,
      );
      const updatedTriplesTime = Date.now();
      logger.log(
        `Updated triples in ${updatedTriplesTime - resolvedStatementsTime} ms`,
      );
      for (const triple of updatedTriples) {
        const { subject, predicate, object, statement, provenance } = triple;
        const safeTriple = {
@ -228,6 +249,9 @@ export class KnowledgeGraphService {
      // Save triples in parallel for better performance
      await Promise.all(updatedTriples.map((triple) => saveTriple(triple)));
      const saveTriplesTime = Date.now();
      logger.log(`Saved triples in ${saveTriplesTime - updatedTriplesTime} ms`);
      // Invalidate invalidated statements
      await invalidateStatements({ statementIds: invalidatedStatements });
--- a/apps/webapp/app/services/search.server.ts
+++ b/apps/webapp/app/services/search.server.ts
@ -39,7 +39,7 @@ export class SearchService {
    query: string,
    userId: string,
    options: SearchOptions = {},
-  ): Promise<{ episodes: string[]; facts: string[]; relatedFacts: string[] }> {
+  ): Promise<{ episodes: string[]; facts: { fact: string; validAt: Date }[] }> {
    const startTime = Date.now();
    // Default options
@ -79,7 +79,6 @@ export class SearchService {
    // // 3. Apply adaptive filtering based on score threshold and minimum count
    const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
    // const filteredResults = rankedStatements;
    // 3. Return top results
    const episodes = await getEpisodesByStatements(filteredResults);
@ -97,22 +96,25 @@ export class SearchService {
    // Log recall asynchronously (don't await to avoid blocking response)
    const responseTime = Date.now() - startTime;
-    this.logRecallAsync(
+    logger.info(`Search completed in ${responseTime}ms for query: ${query}`);
-      query,
+    // this.logRecallAsync(
-      userId,
+    //   query,
-      filteredResults,
+    //   userId,
-      opts,
+    //   filteredResults,
-      responseTime,
+    //   opts,
-    ).catch((error) => {
+    //   responseTime,
-      logger.error("Failed to log recall event:", error);
+    // ).catch((error) => {
-    });
+    //   logger.error("Failed to log recall event:", error);
    // });
    // this.updateRecallCount(userId, episodes, filteredResults);
    return {
      episodes: episodes.map((episode) => episode.content),
-      facts: filteredResults.map((statement) => statement.fact),
+      facts: filteredResults.map((statement) => ({
-      relatedFacts: relatedFacts.map((fact) => fact.fact),
+        fact: statement.fact,
        validAt: statement.validAt,
      })),
    };
  }
--- a/apps/webapp/app/services/search/rerank.ts
+++ b/apps/webapp/app/services/search/rerank.ts
@ -515,9 +515,6 @@ export async function applyCohereReranking(
      .filter((result) => result.cohereScore > 0.3);
    const responseTime = Date.now() - startTime;
    console.log(
      rerankedResults.map((result) => `${result.fact} - ${result.cohereScore}`),
    );
    logger.info(
      `Cohere reranking completed: ${rerankedResults.length} results returned in ${responseTime}ms`,
    );
--- a/apps/webapp/app/services/search/utils.ts
+++ b/apps/webapp/app/services/search/utils.ts
@ -131,23 +131,23 @@ export async function performVectorSearch(
    // 1. Search for similar statements using Neo4j vector search with provenance count
    const cypher = `
-      MATCH (s:Statement)
+    CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
-      WHERE 
+    YIELD node AS s, score
-      (s.userId = $userId)
+    WHERE s.userId = $userId
-      ${timeframeCondition}
+    AND score >= 0.7
-      ${spaceCondition}
+    ${timeframeCondition.replace("AND", "AND").replace("WHERE", "AND")}
-      WITH s, vector.similarity.cosine(s.factEmbedding, $embedding) AS score
+    ${spaceCondition}
-      WHERE score > 0.7
+    OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
-      OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
+    WITH s, score, count(episode) as provenanceCount
-      WITH s, score, count(episode) as provenanceCount
+    RETURN s, score, provenanceCount
-      RETURN s, score, provenanceCount
+    ORDER BY score DESC
-      ORDER BY score DESC
+  `;
    `;
    const params = {
      embedding: query,
      userId,
      validAt: options.endTime.toISOString(),
      topk: options.limit || 100,
      ...(options.startTime && { startTime: options.startTime.toISOString() }),
      ...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
    };
@ -281,15 +281,13 @@ export async function extractEntitiesFromQuery(
  try {
    // Use vector similarity to find relevant entities
    const cypher = `
-        // Match entities using vector similarity on name embeddings
+        // Match entities using vector index on name embeddings
-        MATCH (e:Entity)
+        CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
-        WHERE e.nameEmbedding IS NOT NULL 
+        YIELD node AS e, score
-          AND e.userId = $userId
+        WHERE e.userId = $userId
-        WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score
+          AND score > 0.7
        WHERE score > 0.7
        RETURN e
        ORDER BY score DESC
        LIMIT 3
      `;
    const params = {
--- a/benchmarks/.env.example
+++ b/benchmarks/.env.example
@ -1,2 +0,0 @@
 BASE_URL=https://core.heysol.ai
 API_KEY=
--- a/benchmarks/evaluate_qa.js
+++ b/benchmarks/evaluate_qa.js
@ -1,423 +0,0 @@
 #!/usr/bin/env node
 const fs = require("fs");
 const path = require("path");
 const axios = require("axios");
 /**
 * LOCOMO Q&A Evaluation Script
 * Evaluates question answering against ingested LOCOMO conversations
 * Assumes conversations are already ingested via ingest_conversations.js
 */
 class LocomoEvaluator {
  constructor(baseUrl = "http://localhost:3033") {
    this.baseUrl = baseUrl;
    this.headers = {
      Authorization: "Bearer rc_pat_kbc76ykt3gd81r6ctyeh8as5jryihbeqqvnsi2wt",
    };
    this.results = [];
    // Create axios instance with default config
    this.axios = axios.create({
      baseURL: this.baseUrl,
      headers: this.headers,
    });
  }
  async makeRequest(endpoint, data) {
    try {
      const response = await this.axios.post(endpoint, data, {
        headers: {
          "Content-Type": "application/json",
        },
      });
      return response.data;
    } catch (error) {
      if (error.response) {
        throw new Error(`HTTP ${error.response.status}: ${JSON.stringify(error.response.data)}`);
      } else if (error.request) {
        throw new Error(`No response received: ${error.message}`);
      } else {
        throw new Error(`Request error: ${error.message}`);
      }
    }
  }
  async searchMemory(question, conversationId = null) {
    try {
      const response = await this.makeRequest("/api/v1/search", {
        query: question,
      });
      return response;
    } catch (error) {
      console.error("Search error:", error.message);
      return { results: [] };
    }
  }
  async answerQuestion(question) {
    try {
      const response = await this.makeRequest("/api/v1/qa", {
        question: question,
      });
      return response;
    } catch (error) {
      console.error("Q&A API error:", error.message);
      return {
        question: question,
        generated_answer: "Error: Could not generate answer",
      };
    }
  }
  async evaluateAnswer(question, standardAnswer, generatedAnswer) {
    const response = await this.makeRequest("/api/v1/evaluate", {
      question,
      standard_answer: standardAnswer,
      generated_answer: generatedAnswer,
    });
    return {
      label: response.label,
      reasoning: response.reasoning,
      matchRatio: response.matchRatio,
      evaluationMethod: response.method,
    };
  }
  async evaluateQuestion(question, expectedAnswer, evidence, conversationId, category) {
    // NEW: Get generated answer from Q&A API
    const qaResponse = await this.answerQuestion(question);
    const generatedAnswer = qaResponse.generated_answer || "";
    // NEW: Evaluate the generated answer against the expected answer
    const evaluation = await this.evaluateAnswer(question, expectedAnswer, generatedAnswer);
    return {
      question,
      expectedAnswer,
      evidence,
      category,
      conversationId,
      generatedAnswer: generatedAnswer,
      evaluationResult: evaluation.label,
      evaluationReasoning: evaluation.reasoning,
      matchRatio: evaluation.matchRatio,
      evaluationMethod: evaluation.evaluationMethod,
    };
  }
  async evaluateConversation(conversation, conversationId) {
    console.log(`Evaluating conversation ${conversationId}...`);
    const batchSize = 15; // Process 15 questions concurrently
    const qaResults = [];
    const totalQuestions = conversation.qa.length;
    let processed = 0;
    console.log(`Processing ${totalQuestions} questions in batches of ${batchSize}...`);
    for (let i = 0; i < totalQuestions; i += batchSize) {
      const batch = conversation.qa.slice(i, i + batchSize);
      const batchStartIndex = i;
      console.log(
        `Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(totalQuestions / batchSize)} (questions ${i + 1}-${Math.min(i + batchSize, totalQuestions)})`
      );
      // Create promises for the current batch
      const batchPromises = batch.map(async (qa, batchIndex) => {
        const questionIndex = batchStartIndex + batchIndex;
        console.log(qa.question);
        try {
          const result = await this.evaluateQuestion(
            qa.question,
            qa.answer,
            qa.evidence,
            conversationId,
            qa.category
          );
          return { result, index: questionIndex };
        } catch (error) {
          console.error(`Error evaluating question ${questionIndex + 1}:`, error.message);
          return { error: error.message, index: questionIndex, qa };
        }
      });
      // Process batch concurrently
      const batchResults = await Promise.allSettled(batchPromises);
      // Process results from this batch
      batchResults.forEach((promiseResult) => {
        if (promiseResult.status === "fulfilled") {
          const { result, error, index, qa } = promiseResult.value;
          if (result) {
            qaResults.push(result);
          } else if (error) {
            // Add a placeholder result for failed evaluations
            qaResults.push({
              question: qa.question,
              expectedAnswer: qa.answer ? qa.answer.toString() : qa.adversarial_answer.toString(),
              evidence: qa.evidence,
              category: qa.category,
              conversationId,
              error: error,
              generatedAnswer: "Error: Evaluation failed",
              evaluationResult: "ERROR",
              evaluationReasoning: `Evaluation failed: ${error}`,
              matchRatio: 0,
              evaluationMethod: "error",
            });
          }
        } else {
          console.error(`Batch promise rejected:`, promiseResult.reason);
        }
      });
      processed += batch.length;
      console.log(`  Completed ${processed}/${totalQuestions} questions`);
      // Save results periodically (every batch or ~15 questions)
      console.log(`Saving intermediate results...`);
      this.saveResults();
      // break;
    }
    console.log(`Completed evaluation of ${totalQuestions} questions`);
    return qaResults;
  }
  async runEvaluation() {
    console.log("Starting LOCOMO Q&A evaluation...");
    // Load LOCOMO dataset
    const dataPath = path.join(__dirname, "locomo10.json");
    const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8"));
    console.log(`Loaded ${conversations.length} conversations for evaluation`);
    // Evaluate each conversation
    for (let i = 0; i < conversations.length; i++) {
      const conversation = conversations[i];
      const conversationId = `locomo_${i + 1}`;
      if (i === 0) {
        try {
          const results = await this.evaluateConversation(conversation, conversationId);
          this.results.push({
            conversationId,
            results,
            totalQuestions: conversation.qa.length,
          });
        } catch (error) {
          console.error(`Error evaluating conversation ${conversationId}:`, error.message);
        }
      }
    }
    // Save and summarize results
    this.saveResults();
    this.printDetailedSummary();
  }
  saveResults() {
    const resultsPath = path.join(__dirname, "evaluation_results.json");
    const timestamp = new Date().toISOString();
    const output = {
      timestamp,
      summary: this.calculateSummaryStats(),
      conversations: this.results,
    };
    fs.writeFileSync(resultsPath, JSON.stringify(output, null, 2));
    console.log(`\nResults saved to ${resultsPath}`);
  }
  calculateSummaryStats() {
    const totalQuestions = this.results.reduce((sum, conv) => sum + conv.totalQuestions, 0);
    const questionsWithContext = this.results.reduce(
      (sum, conv) => sum + conv.results.filter((r) => r.hasContext).length,
      0
    );
    const questionsWithAnswerInContext = this.results.reduce(
      (sum, conv) => sum + conv.results.filter((r) => r.answerInContext).length,
      0
    );
    // NEW: Q&A evaluation statistics
    const questionsWithGeneratedAnswers = this.results.reduce(
      (sum, conv) =>
        sum +
        conv.results.filter(
          (r) => r.generatedAnswer && r.generatedAnswer !== "Error: Could not generate answer"
        ).length,
      0
    );
    const correctAnswers = this.results.reduce(
      (sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "CORRECT").length,
      0
    );
    const wrongAnswers = this.results.reduce(
      (sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "WRONG").length,
      0
    );
    const errorAnswers = this.results.reduce(
      (sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "ERROR").length,
      0
    );
    // Category breakdown
    const categoryStats = {};
    this.results.forEach((conv) => {
      conv.results.forEach((result) => {
        const cat = result.category || "unknown";
        if (!categoryStats[cat]) {
          categoryStats[cat] = {
            total: 0,
            withContext: 0,
            withAnswer: 0,
            withGenerated: 0,
            correct: 0,
            wrong: 0,
            errors: 0,
          };
        }
        categoryStats[cat].total++;
        if (result.hasContext) categoryStats[cat].withContext++;
        if (result.answerInContext) categoryStats[cat].withAnswer++;
        if (
          result.generatedAnswer &&
          result.generatedAnswer !== "Error: Could not generate answer" &&
          result.generatedAnswer !== "Error: Evaluation failed"
        ) {
          categoryStats[cat].withGenerated++;
        }
        if (result.evaluationResult === "CORRECT") categoryStats[cat].correct++;
        if (result.evaluationResult === "WRONG") categoryStats[cat].wrong++;
        if (result.evaluationResult === "ERROR") categoryStats[cat].errors++;
      });
    });
    return {
      totalQuestions,
      questionsWithContext,
      questionsWithAnswerInContext,
      contextRetrievalRate: ((questionsWithContext / totalQuestions) * 100).toFixed(1),
      answerFoundRate: ((questionsWithAnswerInContext / totalQuestions) * 100).toFixed(1),
      // NEW: Q&A evaluation metrics
      questionsWithGeneratedAnswers,
      correctAnswers,
      wrongAnswers,
      errorAnswers,
      qaSuccessRate:
        totalQuestions > 0
          ? ((questionsWithGeneratedAnswers / totalQuestions) * 100).toFixed(1)
          : "0.0",
      answerAccuracyRate:
        questionsWithGeneratedAnswers > 0
          ? ((correctAnswers / questionsWithGeneratedAnswers) * 100).toFixed(1)
          : "0.0",
      categoryBreakdown: categoryStats,
    };
  }
  printDetailedSummary() {
    const stats = this.calculateSummaryStats();
    console.log("\n=== LOCOMO EVALUATION RESULTS ===");
    console.log(`Total conversations: ${this.results.length}`);
    console.log(`Total questions: ${stats.totalQuestions}`);
    console.log(
      `Questions with retrieved context: ${stats.questionsWithContext}/${stats.totalQuestions} (${stats.contextRetrievalRate}%)`
    );
    console.log(
      `Questions with answer in context: ${stats.questionsWithAnswerInContext}/${stats.totalQuestions} (${stats.answerFoundRate}%)`
    );
    console.log("\n=== Q&A EVALUATION RESULTS ===");
    console.log(
      `Questions with generated answers: ${stats.questionsWithGeneratedAnswers}/${stats.totalQuestions} (${stats.qaSuccessRate}%)`
    );
    console.log(
      `Correct answers: ${stats.correctAnswers}/${stats.questionsWithGeneratedAnswers} (${stats.answerAccuracyRate}%)`
    );
    console.log(`Wrong answers: ${stats.wrongAnswers}/${stats.questionsWithGeneratedAnswers}`);
    if (stats.errorAnswers > 0) {
      console.log(`Evaluation errors: ${stats.errorAnswers}/${stats.totalQuestions}`);
    }
    console.log("\n=== CATEGORY BREAKDOWN ===");
    Object.entries(stats.categoryBreakdown).forEach(([category, catStats]) => {
      const retrievalRate = ((catStats.withAnswer / catStats.total) * 100).toFixed(1);
      const qaRate =
        catStats.withGenerated > 0
          ? ((catStats.withGenerated / catStats.total) * 100).toFixed(1)
          : "0.0";
      const accuracyRate =
        catStats.withGenerated > 0
          ? ((catStats.correct / catStats.withGenerated) * 100).toFixed(1)
          : "0.0";
      console.log(`Category ${category}:`);
      console.log(`  Total questions: ${catStats.total}`);
      console.log(
        `  Context retrieval: ${catStats.withAnswer}/${catStats.total} (${retrievalRate}%)`
      );
      console.log(`  Generated answers: ${catStats.withGenerated}/${catStats.total} (${qaRate}%)`);
      console.log(
        `  Answer accuracy: ${catStats.correct}/${catStats.withGenerated} (${accuracyRate}%)`
      );
      if (catStats.errors > 0) {
        console.log(`  Evaluation errors: ${catStats.errors}/${catStats.total}`);
      }
    });
    console.log("\n=== PERFORMANCE INSIGHTS ===");
    const avgContextLength =
      this.results.reduce(
        (sum, conv) => sum + conv.results.reduce((s, r) => s + r.contextLength, 0),
        0
      ) / stats.totalQuestions;
    console.log(`Average context length: ${avgContextLength.toFixed(0)} characters`);
    const avgMatchRatio =
      this.results.reduce(
        (sum, conv) => sum + conv.results.reduce((s, r) => s + (r.matchRatio || 0), 0),
        0
      ) / stats.totalQuestions;
    console.log(`Average answer match ratio: ${avgMatchRatio.toFixed(3)}`);
    // Show evaluation method breakdown
    const evaluationMethods = {};
    this.results.forEach((conv) => {
      conv.results.forEach((result) => {
        const method = result.evaluationMethod || "unknown";
        evaluationMethods[method] = (evaluationMethods[method] || 0) + 1;
      });
    });
    console.log("\n=== EVALUATION SUMMARY ===");
    console.log(
      "This evaluation measures both retrieval performance and answer generation accuracy."
    );
    console.log("Generated answers are evaluated against gold standard answers.");
    console.log("\n=== EVALUATION METHODS USED ===");
    Object.entries(evaluationMethods).forEach(([method, count]) => {
      const percentage = ((count / stats.totalQuestions) * 100).toFixed(1);
      console.log(`${method}: ${count}/${stats.totalQuestions} (${percentage}%)`);
    });
  }
 }
 // Command line interface
 if (require.main === module) {
  const evaluator = new LocomoEvaluator();
  evaluator.runEvaluation().catch(console.error);
 }
 module.exports = LocomoEvaluator;
--- a/benchmarks/evaluation_results.json
+++ b/benchmarks/evaluation_results.json
--- a/benchmarks/ingest_conversations.js
+++ b/benchmarks/ingest_conversations.js
@ -1,304 +0,0 @@
 #!/usr/bin/env node
 const fs = require("fs");
 const path = require("path");
 const axios = require("axios");
 /**
 * LOCOMO Conversation Ingestion Script
 * Ingests LOCOMO conversations into C.O.R.E memory system
 * Tracks ingestion status to avoid duplicates
 */
 class LocomoIngester {
  constructor() {
    this.baseUrl = "http://3.95.55.23:3033";
    this.headers = {
      Authorization: `Bearer rc_pat_kbc76ykt3gd81r6ctyeh8as5jryihbeqqvnsi2wt`,
    };
    this.statusFile = path.join(__dirname, "ingestion_status.json");
    // Create axios instance with default config
    this.axios = axios.create({
      baseURL: this.baseUrl,
      headers: this.headers,
      timeout: 10000, // 10 second timeout
    });
  }
  async makeRequest(endpoint, data) {
    try {
      const response = await this.axios.post(endpoint, data, {
        headers: {
          "Content-Type": "application/json",
        },
      });
      return response.data;
    } catch (error) {
      if (error.response) {
        // Server responded with error status
        throw new Error(`HTTP ${error.response.status}: ${JSON.stringify(error.response.data)}`);
      } else if (error.request) {
        // Request was made but no response received
        throw new Error(`No response received: ${error.message}`);
      } else {
        // Something else happened
        throw new Error(`Request error: ${error.message}`);
      }
    }
  }
  loadIngestionStatus() {
    try {
      if (fs.existsSync(this.statusFile)) {
        return JSON.parse(fs.readFileSync(this.statusFile, "utf8"));
      }
    } catch (error) {
      console.warn("Could not load ingestion status:", error.message);
    }
    return { conversations: {}, timestamp: null };
  }
  saveIngestionStatus(status) {
    fs.writeFileSync(this.statusFile, JSON.stringify(status, null, 2));
  }
  async ingestConversation(conversation, conversationId, forceReingest = false) {
    const status = this.loadIngestionStatus();
    const sessionId =
      Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
    if (status.conversations[conversationId] && !forceReingest) {
      console.log(`Conversation ${conversationId} already ingested, skipping...`);
      return false;
    }
    console.log(`BASE URL: ${this.baseUrl} ${process.env.BASE_URL}`);
    console.log(`Ingesting conversation ${conversationId}...`);
    const episodes = this.formatConversationForIngestion(conversation, conversationId);
    let successCount = 0;
    let errorCount = 0;
    for (const [index, episode] of episodes.entries()) {
      // if (index >= 0 && index < 20) {
      try {
        const payload = {
          episodeBody: episode.content,
          referenceTime: episode.metadata.timestamp,
          source: "locomo_benchmark",
          sessionId: `${sessionId}-${episode.metadata.sessionNumber}`,
        };
        await this.makeRequest("/api/v1/add", payload);
        successCount++;
        // Progress indicator
        if ((index + 1) % 10 === 0) {
          console.log(`  Ingested ${index + 1}/${episodes.length} episodes`);
        }
        // Small delay to avoid overwhelming the system
        await new Promise((resolve) => setTimeout(resolve, 50));
      } catch (error) {
        console.error(`  Error ingesting episode ${index}:`, error.message);
        errorCount++;
      }
      // }
    }
    // Update status
    status.conversations[conversationId] = {
      ingested: true,
      timestamp: new Date().toISOString(),
      totalEpisodes: episodes.length,
      successCount,
      errorCount,
    };
    status.timestamp = new Date().toISOString();
    this.saveIngestionStatus(status);
    console.log(`  Completed: ${successCount} success, ${errorCount} errors`);
    return true;
  }
  formatConversationForIngestion(conversation, conversationId) {
    const episodes = [];
    const conv = conversation.conversation;
    // Extract speaker names
    const speakerA = conv.speaker_a;
    const speakerB = conv.speaker_b;
    // Process each session
    Object.keys(conv).forEach((key) => {
      if (
        key.startsWith("session_") &&
        !key.endsWith("_date_time")
        // ["session_1"].includes(key)
      ) {
        console.log(`Processing session ${key}`);
        const sessionNumber = key.replace("session_", "");
        const sessionData = conv[key];
        const sessionDateTime = conv[`session_${sessionNumber}_date_time`];
        if (Array.isArray(sessionData)) {
          sessionData.forEach((dialog, dialogIndex) => {
            episodes.push({
              content: `${dialog.speaker}: ${dialog.blip_caption ? `Shared ${dialog.blip_caption}. ${dialog.query}.` : ""} ${dialog.text}`,
              metadata: {
                conversationId,
                sessionNumber: parseInt(sessionNumber),
                dialogIndex,
                dialogId: dialog.dia_id,
                timestamp: sessionDateTime
                  ? new Date(
                      Date.parse(
                        sessionDateTime.replace(
                          /(\d+):(\d+) (am|pm) on (\d+) (\w+), (\d+)/,
                          (_, hours, minutes, ampm, day, month, year) => {
                            const monthMap = {
                              January: 1,
                              Jan: 1,
                              February: 2,
                              Feb: 2,
                              March: 3,
                              Mar: 3,
                              April: 4,
                              Apr: 4,
                              May: 5,
                              June: 6,
                              Jun: 6,
                              July: 7,
                              Jul: 7,
                              August: 8,
                              Aug: 8,
                              September: 9,
                              Sep: 9,
                              October: 10,
                              Oct: 10,
                              November: 11,
                              Nov: 11,
                              December: 12,
                              Dec: 12,
                            };
                            const monthNum = monthMap[month] || 1;
                            return `${year}-${monthNum.toString().padStart(2, "0")}-${day.padStart(2, "0")} ${hours}:${minutes} ${ampm}`;
                          }
                        )
                      )
                    ).toISOString()
                  : null,
                speaker: dialog.speaker,
                speakerA,
                speakerB,
                source: "locomo_benchmark",
              },
            });
          });
        }
      }
    });
    return episodes;
  }
  async ingestAll(forceReingest = false) {
    console.log("Starting LOCOMO conversation ingestion...");
    if (forceReingest) {
      console.log("Force re-ingestion enabled - will overwrite existing data");
    }
    // Load LOCOMO dataset
    const dataPath = path.join(__dirname, "locomo10.json");
    const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8"));
    console.log(`Loaded ${conversations.length} conversations`);
    let ingestedCount = 0;
    let skippedCount = 0;
    // Ingest each conversation
    for (let i = 0; i < conversations.length; i++) {
      if (i === 4) {
        const conversation = conversations[i];
        const conversationId = `locomo_${i + 1}`;
        try {
          const wasIngested = await this.ingestConversation(
            conversation,
            conversationId,
            forceReingest
          );
          if (wasIngested) {
            ingestedCount++;
          } else {
            skippedCount++;
          }
        } catch (error) {
          console.error(`Error with conversation ${conversationId}:`, error.message);
        }
      }
    }
    this.printSummary(ingestedCount, skippedCount);
  }
  printSummary(ingestedCount, skippedCount) {
    console.log("\n=== INGESTION SUMMARY ===");
    console.log(`Conversations ingested: ${ingestedCount}`);
    console.log(`Conversations skipped: ${skippedCount}`);
    console.log(`Status file: ${this.statusFile}`);
    const status = this.loadIngestionStatus();
    const totalEpisodes = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.totalEpisodes || 0),
      0
    );
    const totalSuccess = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.successCount || 0),
      0
    );
    console.log(`Total episodes ingested: ${totalSuccess}/${totalEpisodes}`);
    console.log("\nReady for evaluation phase!");
  }
  getStatus() {
    const status = this.loadIngestionStatus();
    const conversations = Object.keys(status.conversations).length;
    const totalEpisodes = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.successCount || 0),
      0
    );
    return {
      conversations,
      episodes: totalEpisodes,
      lastIngestion: status.timestamp,
    };
  }
 }
 // Command line interface
 if (require.main === module) {
  const args = process.argv.slice(2);
  const forceReingest = args.includes("--force");
  const showStatus = args.includes("--status");
  const ingester = new LocomoIngester();
  if (showStatus) {
    const status = ingester.getStatus();
    console.log("LOCOMO Ingestion Status:");
    console.log(`  Conversations: ${status.conversations}`);
    console.log(`  Episodes: ${status.episodes}`);
    console.log(`  Last ingestion: ${status.lastIngestion || "Never"}`);
  } else {
    ingester.ingestAll(forceReingest).catch(console.error);
  }
 }
 module.exports = LocomoIngester;
--- a/benchmarks/ingest_sessions.js
+++ b/benchmarks/ingest_sessions.js
@ -1,265 +0,0 @@
 #!/usr/bin/env node
 const fs = require("fs");
 const path = require("path");
 const axios = require("axios");
 /**
 * LOCOMO Session Summary Ingestion Script
 * Ingests LOCOMO session summaries - comprehensive and available for all conversations
 * More efficient than full conversations while preserving all key information
 */
 class LocomoSessionIngester {
  constructor(baseUrl = process.env.BASE_URL) {
    this.baseUrl = baseUrl;
    this.headers = {
      Authorization: `Bearer ${process.env.API_KEY}`,
    };
    this.statusFile = path.join(__dirname, "session_ingestion_status.json");
    // Create axios instance with default config
    this.axios = axios.create({
      baseURL: this.baseUrl,
      headers: this.headers,
      timeout: 10000,
    });
  }
  async makeRequest(endpoint, data) {
    try {
      const response = await this.axios.post(endpoint, data, {
        headers: {
          "Content-Type": "application/json",
        },
      });
      return response.data;
    } catch (error) {
      if (error.response) {
        throw new Error(`HTTP ${error.response.status}: ${JSON.stringify(error.response.data)}`);
      } else if (error.request) {
        throw new Error(`No response received: ${error.message}`);
      } else {
        throw new Error(`Request error: ${error.message}`);
      }
    }
  }
  loadIngestionStatus() {
    try {
      if (fs.existsSync(this.statusFile)) {
        return JSON.parse(fs.readFileSync(this.statusFile, "utf8"));
      }
    } catch (error) {
      console.warn("Could not load ingestion status:", error.message);
    }
    return { conversations: {}, timestamp: null };
  }
  saveIngestionStatus(status) {
    fs.writeFileSync(this.statusFile, JSON.stringify(status, null, 2));
  }
  formatSessionSummaryForIngestion(conversation, conversationId) {
    const episodes = [];
    const sessionSummary = conversation.session_summary;
    const conv = conversation.conversation;
    const speakerA = conv.speaker_a;
    const speakerB = conv.speaker_b;
    // Process each session summary
    Object.entries(sessionSummary).forEach(([sessionKey, summary]) => {
      const sessionNumber = sessionKey.replace("session_", "").replace("_summary", "");
      episodes.push({
        content: `Session ${sessionNumber} Summary: ${summary}`,
        metadata: {
          conversationId,
          sessionNumber: parseInt(sessionNumber),
          speakerA,
          speakerB,
          source: "locomo_sessions",
          type: "session_summary",
        },
      });
    });
    return episodes;
  }
  async ingestConversation(conversation, conversationId, forceReingest = false) {
    const status = this.loadIngestionStatus();
    if (status.conversations[conversationId] && !forceReingest) {
      console.log(`Conversation ${conversationId} already ingested, skipping...`);
      return false;
    }
    console.log(`Ingesting session summaries for conversation ${conversationId}...`);
    const episodes = this.formatSessionSummaryForIngestion(conversation, conversationId);
    let successCount = 0;
    let errorCount = 0;
    console.log(`  Total sessions to ingest: ${episodes.length}`);
    for (const [index, episode] of episodes.entries()) {
      try {
        const payload = {
          episodeBody: episode.content,
          referenceTime: new Date(Date.now() + index * 1000).toISOString(),
          source: "locomo_sessions",
        };
        await this.makeRequest("/api/v1/add", payload);
        successCount++;
        // Progress indicator
        if ((index + 1) % 10 === 0) {
          console.log(`  Ingested ${index + 1}/${episodes.length} sessions`);
        }
        // Small delay
        await new Promise((resolve) => setTimeout(resolve, 100));
      } catch (error) {
        console.error(`  Error ingesting session ${index}:`, error.message);
        errorCount++;
      }
    }
    // Update status
    status.conversations[conversationId] = {
      ingested: true,
      timestamp: new Date().toISOString(),
      totalEpisodes: episodes.length,
      successCount,
      errorCount,
    };
    status.timestamp = new Date().toISOString();
    this.saveIngestionStatus(status);
    console.log(`  Completed: ${successCount} success, ${errorCount} errors`);
    return true;
  }
  async ingestAll(forceReingest = false) {
    console.log("Starting LOCOMO session summary ingestion...");
    if (forceReingest) {
      console.log("Force re-ingestion enabled");
    }
    // Load LOCOMO dataset
    const dataPath = path.join(__dirname, "data", "locomo10.json");
    const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8"));
    console.log(`Loaded ${conversations.length} conversations`);
    let ingestedCount = 0;
    let skippedCount = 0;
    // Test connection first
    try {
      console.log("Testing connection...");
      await this.makeRequest("/api/v1/add", {
        episodeBody: "Session ingestion test",
        referenceTime: new Date().toISOString(),
        source: "test",
      });
      console.log("Connection test successful");
    } catch (error) {
      console.error("Connection test failed:", error.message);
      return;
    }
    // Ingest all conversations
    for (let i = 0; i < conversations.length; i++) {
      const conversation = conversations[i];
      const conversationId = `locomo_sessions_${i + 1}`;
      if (i === 0) {
        try {
          const wasIngested = await this.ingestConversation(
            conversation,
            conversationId,
            forceReingest
          );
          if (wasIngested) {
            ingestedCount++;
          } else {
            skippedCount++;
          }
        } catch (error) {
          console.error(`Error with conversation ${conversationId}:`, error.message);
        }
      }
    }
    this.printSummary(ingestedCount, skippedCount);
  }
  printSummary(ingestedCount, skippedCount) {
    console.log("\n=== SESSION SUMMARY INGESTION ===");
    console.log(`Conversations processed: ${ingestedCount}`);
    console.log(`Conversations skipped: ${skippedCount}`);
    const status = this.loadIngestionStatus();
    const totalSessions = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.totalEpisodes || 0),
      0
    );
    const totalSuccess = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.successCount || 0),
      0
    );
    const totalErrors = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.errorCount || 0),
      0
    );
    console.log(`Total sessions ingested: ${totalSuccess}/${totalSessions}`);
    console.log(
      `Success rate: ${((totalSuccess / (totalSuccess + totalErrors || 1)) * 100).toFixed(1)}%`
    );
    console.log("\nReady for evaluation phase!");
    console.log("Benefits: Fast ingestion, comprehensive summaries, all conversations covered");
  }
  getStatus() {
    const status = this.loadIngestionStatus();
    const conversations = Object.keys(status.conversations).length;
    const totalSessions = Object.values(status.conversations).reduce(
      (sum, conv) => sum + (conv.successCount || 0),
      0
    );
    return {
      conversations,
      sessions: totalSessions,
      lastIngestion: status.timestamp,
    };
  }
 }
 // Command line interface
 if (require.main === module) {
  const args = process.argv.slice(2);
  const forceReingest = args.includes("--force");
  const showStatus = args.includes("--status");
  const ingester = new LocomoSessionIngester();
  if (showStatus) {
    const status = ingester.getStatus();
    console.log("LOCOMO Session Ingestion Status:");
    console.log(`  Conversations: ${status.conversations}`);
    console.log(`  Sessions: ${status.sessions}`);
    console.log(`  Last ingestion: ${status.lastIngestion || "Never"}`);
  } else {
    ingester.ingestAll(forceReingest).catch(console.error);
  }
 }
 module.exports = LocomoSessionIngester;
--- a/benchmarks/ingestion_status.json
+++ b/benchmarks/ingestion_status.json
@ -1,40 +0,0 @@
 {
  "conversations": {
    "locomo_1": {
      "ingested": true,
      "timestamp": "2025-08-22T14:47:13.572Z",
      "totalEpisodes": 419,
      "successCount": 419,
      "errorCount": 0
    },
    "locomo_2": {
      "ingested": true,
      "timestamp": "2025-08-22T14:50:26.625Z",
      "totalEpisodes": 369,
      "successCount": 369,
      "errorCount": 0
    },
    "locomo_3": {
      "ingested": true,
      "timestamp": "2025-08-22T14:55:30.262Z",
      "totalEpisodes": 663,
      "successCount": 663,
      "errorCount": 0
    },
    "locomo_4": {
      "ingested": true,
      "timestamp": "2025-08-22T15:00:02.753Z",
      "totalEpisodes": 629,
      "successCount": 629,
      "errorCount": 0
    },
    "locomo_5": {
      "ingested": true,
      "timestamp": "2025-08-22T15:04:17.340Z",
      "totalEpisodes": 680,
      "successCount": 680,
      "errorCount": 0
    }
  },
  "timestamp": "2025-08-22T15:04:17.340Z"
 }
--- a/benchmarks/locomo10.json
+++ b/benchmarks/locomo10.json
--- a/benchmarks/package-lock.json
+++ b/benchmarks/package-lock.json
@ -1,295 +0,0 @@
 {
  "name": "locomo",
  "version": "1.0.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "locomo",
      "version": "1.0.0",
      "license": "ISC",
      "dependencies": {
        "axios": "^1.11.0"
      }
    },
    "node_modules/asynckit": {
      "version": "0.4.0",
      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
      "license": "MIT"
    },
    "node_modules/axios": {
      "version": "1.11.0",
      "resolved": "https://registry.npmjs.org/axios/-/axios-1.11.0.tgz",
      "integrity": "sha512-1Lx3WLFQWm3ooKDYZD1eXmoGO9fxYQjrycfHFC8P0sCfQVXyROp0p9PFWBehewBOdCwHc+f/b8I0fMto5eSfwA==",
      "license": "MIT",
      "dependencies": {
        "follow-redirects": "^1.15.6",
        "form-data": "^4.0.4",
        "proxy-from-env": "^1.1.0"
      }
    },
    "node_modules/call-bind-apply-helpers": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
      "license": "MIT",
      "dependencies": {
        "es-errors": "^1.3.0",
        "function-bind": "^1.1.2"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/combined-stream": {
      "version": "1.0.8",
      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
      "license": "MIT",
      "dependencies": {
        "delayed-stream": "~1.0.0"
      },
      "engines": {
        "node": ">= 0.8"
      }
    },
    "node_modules/delayed-stream": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
      "license": "MIT",
      "engines": {
        "node": ">=0.4.0"
      }
    },
    "node_modules/dunder-proto": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
      "license": "MIT",
      "dependencies": {
        "call-bind-apply-helpers": "^1.0.1",
        "es-errors": "^1.3.0",
        "gopd": "^1.2.0"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-define-property": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-errors": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-object-atoms": {
      "version": "1.1.1",
      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
      "license": "MIT",
      "dependencies": {
        "es-errors": "^1.3.0"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/es-set-tostringtag": {
      "version": "2.1.0",
      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
      "license": "MIT",
      "dependencies": {
        "es-errors": "^1.3.0",
        "get-intrinsic": "^1.2.6",
        "has-tostringtag": "^1.0.2",
        "hasown": "^2.0.2"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/follow-redirects": {
      "version": "1.15.11",
      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
      "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
      "funding": [
        {
          "type": "individual",
          "url": "https://github.com/sponsors/RubenVerborgh"
        }
      ],
      "license": "MIT",
      "engines": {
        "node": ">=4.0"
      },
      "peerDependenciesMeta": {
        "debug": {
          "optional": true
        }
      }
    },
    "node_modules/form-data": {
      "version": "4.0.4",
      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
      "integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
      "license": "MIT",
      "dependencies": {
        "asynckit": "^0.4.0",
        "combined-stream": "^1.0.8",
        "es-set-tostringtag": "^2.1.0",
        "hasown": "^2.0.2",
        "mime-types": "^2.1.12"
      },
      "engines": {
        "node": ">= 6"
      }
    },
    "node_modules/function-bind": {
      "version": "1.1.2",
      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
      "license": "MIT",
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/get-intrinsic": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
      "license": "MIT",
      "dependencies": {
        "call-bind-apply-helpers": "^1.0.2",
        "es-define-property": "^1.0.1",
        "es-errors": "^1.3.0",
        "es-object-atoms": "^1.1.1",
        "function-bind": "^1.1.2",
        "get-proto": "^1.0.1",
        "gopd": "^1.2.0",
        "has-symbols": "^1.1.0",
        "hasown": "^2.0.2",
        "math-intrinsics": "^1.1.0"
      },
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/get-proto": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
      "license": "MIT",
      "dependencies": {
        "dunder-proto": "^1.0.1",
        "es-object-atoms": "^1.0.0"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/gopd": {
      "version": "1.2.0",
      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/has-symbols": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/has-tostringtag": {
      "version": "1.0.2",
      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
      "license": "MIT",
      "dependencies": {
        "has-symbols": "^1.0.3"
      },
      "engines": {
        "node": ">= 0.4"
      },
      "funding": {
        "url": "https://github.com/sponsors/ljharb"
      }
    },
    "node_modules/hasown": {
      "version": "2.0.2",
      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
      "license": "MIT",
      "dependencies": {
        "function-bind": "^1.1.2"
      },
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/math-intrinsics": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.4"
      }
    },
    "node_modules/mime-db": {
      "version": "1.52.0",
      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
      "license": "MIT",
      "engines": {
        "node": ">= 0.6"
      }
    },
    "node_modules/mime-types": {
      "version": "2.1.35",
      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
      "license": "MIT",
      "dependencies": {
        "mime-db": "1.52.0"
      },
      "engines": {
        "node": ">= 0.6"
      }
    },
    "node_modules/proxy-from-env": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
      "license": "MIT"
    }
  }
 }
--- a/benchmarks/package.json
+++ b/benchmarks/package.json
@ -1,23 +0,0 @@
 {
  "name": "locomo",
  "version": "1.0.0",
  "description": "**Authors**: [Adyasha Maharana](https://adymaharana.github.io/), [Dong-Ho Lee](https://www.danny-lee.info/), [Sergey Tulyakov](https://stulyakov.com/), [Mohit Bansal](https://www.cs.unc.edu/~mbansal/), [Francesco Barbieri](https://fvancesco.github.io/) and [Yuwei Fang](https://yuwfan.github.io/)",
  "main": "debug_auth.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/snap-research/locomo.git"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "bugs": {
    "url": "https://github.com/snap-research/locomo/issues"
  },
  "homepage": "https://github.com/snap-research/locomo#readme",
  "dependencies": {
    "axios": "^1.11.0"
  }
 }