Fix: better ingest and recall episodes

This commit is contained in:
Manoj K 2025-08-27 01:13:05 +05:30
parent bbcab15916
commit 906d2fc18a
18 changed files with 191 additions and 71132 deletions

View File

@ -312,6 +312,18 @@ const initializeSchema = async () => {
await runQuery( await runQuery(
"CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)", "CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
); );
await runQuery(
"CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)",
);
await runQuery(
"CREATE INDEX entity_type IF NOT EXISTS FOR (n:Entity) ON (n.type)",
);
await runQuery(
"CREATE INDEX entity_user_id IF NOT EXISTS FOR (n:Entity) ON (n.userId)",
);
await runQuery(
"CREATE INDEX statement_user_id IF NOT EXISTS FOR (n:Statement) ON (n.userId)",
);
await runQuery( await runQuery(
"CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)", "CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)",
); );
@ -322,17 +334,17 @@ const initializeSchema = async () => {
// Create vector indexes for semantic search (if using Neo4j 5.0+) // Create vector indexes for semantic search (if using Neo4j 5.0+)
await runQuery(` await runQuery(`
CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}} OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
`); `);
await runQuery(` await runQuery(`
CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}} OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
`); `);
await runQuery(` await runQuery(`
CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}} OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
`); `);
// Create fulltext indexes for BM25 search // Create fulltext indexes for BM25 search
@ -348,7 +360,7 @@ const initializeSchema = async () => {
await runQuery(` await runQuery(`
CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS
FOR (n:Entity) ON EACH [n.name, n.description] FOR (n:Entity) ON EACH [n.name]
OPTIONS { OPTIONS {
indexConfig: { indexConfig: {
\`fulltext.analyzer\`: 'english' \`fulltext.analyzer\`: 'english'

View File

@ -45,9 +45,13 @@ const { action, loader } = createActionApiRoute(
); );
// Combine episodes and facts into context // Combine episodes and facts into context
const context = [...searchResults.episodes, ...searchResults.facts].join("\n\n"); let context = [...searchResults.episodes].join("\n\n");
console.log("Context:", context); searchResults.facts.map((fact) => {
context += `\n\nfact: ${fact.fact}\n validAt: ${fact.validAt}`;
});
// console.log("Context:", context);
if (!context.trim()) { if (!context.trim()) {
return json({ return json({
@ -57,32 +61,77 @@ const { action, loader } = createActionApiRoute(
} }
// Generate answer using LLM // Generate answer using LLM
const prompt = `Based on the following context information, please answer the question. Be specific and concise, using only information from the provided context. If the context doesn't contain enough information to answer the question, say so. const prompt = `You are an analytical AI that reasons deeply about context before answering questions. Your task is to:
Context: 1. FIRST: Look for direct, explicit answers in the context
${context} 2. ANALYZE the context thoroughly for relevant information
3. IDENTIFY patterns, connections, and implications
4. REASON about what the context suggests or implies
5. ANSWER based on direct evidence OR analysis
Question: ${body.question} <reasoning>
- Scan through ALL episodes and facts completely before answering
- Look for every explicit statement that relates to the question
- NEVER stop after finding the first answer - continue scanning for more
- When asking "what did X show Y", look for ALL items X showed Y on that date
- Collect multiple items, events, or details that answer the same question
- If not found directly, identify all context elements related to the question
- Look for patterns, themes, and implicit information in the context
- Consider what the context suggests beyond explicit statements
- Note any contradictions or missing information that affects the answer
- Pay close attention to temporal information and dates (validAt timestamps)
- For time-sensitive questions, prioritize more recent information
- Consider the chronological sequence of events when relevant
- CRITICAL: Ensure completeness by including ALL relevant items found
- If you find 2+ items for the same question, mention them all in your answer
- Be precise with details (specific types, colors, descriptions when available)
- Draw logical conclusions based on available evidence
- Don't give reasoning in the output
</reasoning>
Answer:`; Follow this output format. don't give the JSON with \`\`\`json
<output>
{"answer" : "Your direct, short(max 2 sentences) answer based on your analysis"}
</output>
`;
let generatedAnswer = ""; const userPrompt = `<context>
${context}
</context>
<question>
Question: ${body.question}
</question>
`;
let responseText = "";
let generated_answer = "";
try { try {
generatedAnswer = await makeModelCall( await makeModelCall(
false, // Don't stream false, // Don't stream
[{ role: "user", content: prompt }], [{ role: "system", content: prompt }, { role: "user", content: userPrompt }],
(_text: string, _model: string) => { (text) => {
// onFinish callback - we can log model usage here if needed responseText = text;
} }
) as string; );
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {
try {
const parsedOutput = JSON.parse(outputMatch[1].trim());
generated_answer = parsedOutput.answer || "No answer provided";
} catch (jsonError) {
console.error("Error parsing JSON output:", jsonError);
generated_answer = outputMatch[1].trim();
}
}
} catch (error) { } catch (error) {
console.error("Error generating answer:", error); console.error("Error generating answer:", error);
generatedAnswer = "I encountered an error while generating an answer to this question."; generated_answer = "I encountered an error while generating an answer to this question.";
} }
return json({ return json({
question: body.question, question: body.question,
generated_answer: generatedAnswer.trim(), generated_answer,
}); });
}, },
); );

View File

@ -83,16 +83,15 @@ export async function findSimilarEntities(params: {
userId: string; userId: string;
}): Promise<EntityNode[]> { }): Promise<EntityNode[]> {
const query = ` const query = `
MATCH (entity:Entity) CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
WHERE entity.nameEmbedding IS NOT NULL YIELD node AS entity, score
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
WHERE score >= $threshold WHERE score >= $threshold
AND entity.userId = $userId AND entity.userId = $userId
RETURN entity, score RETURN entity, score
ORDER BY score DESC ORDER BY score DESC
`; `;
const result = await runQuery(query, params); const result = await runQuery(query, { ...params, topK: params.limit });
return result.map((record) => { return result.map((record) => {
const entity = record.get("entity").properties; const entity = record.get("entity").properties;
@ -118,9 +117,8 @@ export async function findSimilarEntitiesWithSameType(params: {
userId: string; userId: string;
}): Promise<EntityNode[]> { }): Promise<EntityNode[]> {
const query = ` const query = `
MATCH (entity:Entity) CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
WHERE entity.nameEmbedding IS NOT NULL YIELD node AS entity, score
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
WHERE score >= $threshold WHERE score >= $threshold
AND entity.userId = $userId AND entity.userId = $userId
AND entity.type = $entityType AND entity.type = $entityType
@ -128,7 +126,7 @@ export async function findSimilarEntitiesWithSameType(params: {
ORDER BY score DESC ORDER BY score DESC
`; `;
const result = await runQuery(query, params); const result = await runQuery(query, { ...params, topK: params.limit });
return result.map((record) => { return result.map((record) => {
const entity = record.get("entity").properties; const entity = record.get("entity").properties;

View File

@ -137,16 +137,10 @@ export async function searchEpisodesByEmbedding(params: {
minSimilarity?: number; minSimilarity?: number;
}) { }) {
const query = ` const query = `
MATCH (episode:Episode) CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
YIELD node AS episode, score
WHERE episode.userId = $userId WHERE episode.userId = $userId
AND episode.contentEmbedding IS NOT NULL AND score >= $minSimilarity
WITH episode,
CASE
WHEN size(episode.contentEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
RETURN episode, score RETURN episode, score
ORDER BY score DESC`; ORDER BY score DESC`;
@ -154,6 +148,7 @@ export async function searchEpisodesByEmbedding(params: {
embedding: params.embedding, embedding: params.embedding,
minSimilarity: params.minSimilarity, minSimilarity: params.minSimilarity,
userId: params.userId, userId: params.userId,
topK: 100,
}); });
if (!result || result.length === 0) { if (!result || result.length === 0) {
@ -283,15 +278,10 @@ export async function getRelatedEpisodesEntities(params: {
minSimilarity?: number; minSimilarity?: number;
}) { }) {
const query = ` const query = `
MATCH (episode:Episode {userId: $userId}) CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
WHERE episode.contentEmbedding IS NOT NULL YIELD node AS episode, score
WITH episode, WHERE episode.userId = $userId
CASE AND score >= $minSimilarity
WHEN size(episode.contentEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity) OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
WHERE entity IS NOT NULL WHERE entity IS NOT NULL
RETURN DISTINCT entity`; RETURN DISTINCT entity`;
@ -300,6 +290,7 @@ export async function getRelatedEpisodesEntities(params: {
embedding: params.embedding, embedding: params.embedding,
minSimilarity: params.minSimilarity, minSimilarity: params.minSimilarity,
userId: params.userId, userId: params.userId,
topK: params.limit || 100,
}); });
return result return result

View File

@ -110,11 +110,10 @@ export async function findContradictoryStatements({
userId: string; userId: string;
}): Promise<StatementNode[]> { }): Promise<StatementNode[]> {
const query = ` const query = `
MATCH (statement:Statement) MATCH (subject:Entity {uuid: $subjectId}), (predicate:Entity {uuid: $predicateId})
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_PREDICATE]->(predicate)
WHERE statement.userId = $userId WHERE statement.userId = $userId
AND statement.invalidAt IS NULL AND statement.invalidAt IS NULL
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
RETURN statement RETURN statement
`; `;
@ -157,18 +156,21 @@ export async function findStatementsWithSameSubjectObject({
userId: string; userId: string;
}): Promise<StatementNode[]> { }): Promise<StatementNode[]> {
const query = ` const query = `
MATCH (statement:Statement) MATCH (subject:Entity {uuid: $subjectId}), (object:Entity {uuid: $objectId})
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_OBJECT]->(object)
MATCH (statement)-[:HAS_PREDICATE]->(predicate:Entity)
WHERE statement.userId = $userId WHERE statement.userId = $userId
AND statement.invalidAt IS NULL AND statement.invalidAt IS NULL
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
MATCH (statement)-[:HAS_OBJECT]->(object:Entity)
WHERE subject.uuid = $subjectId
AND object.uuid = $objectId
${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""} ${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""}
RETURN statement RETURN statement
`; `;
const params = { subjectId, objectId, userId, ...(excludePredicateId && { excludePredicateId }) }; const params = {
subjectId,
objectId,
userId,
...(excludePredicateId && { excludePredicateId }),
};
const result = await runQuery(query, params); const result = await runQuery(query, params);
if (!result || result.length === 0) { if (!result || result.length === 0) {
@ -207,13 +209,12 @@ export async function findSimilarStatements({
userId: string; userId: string;
}): Promise<StatementNode[]> { }): Promise<StatementNode[]> {
const query = ` const query = `
MATCH (statement:Statement) CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
YIELD node AS statement, score
WHERE statement.userId = $userId WHERE statement.userId = $userId
AND statement.invalidAt IS NULL AND statement.invalidAt IS NULL
AND statement.factEmbedding IS NOT NULL AND score >= $threshold
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""} ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
WHERE score >= $threshold
RETURN statement, score RETURN statement, score
ORDER BY score DESC ORDER BY score DESC
`; `;
@ -223,6 +224,7 @@ export async function findSimilarStatements({
threshold, threshold,
excludeIds, excludeIds,
userId, userId,
topK: 100,
}); });
if (!result || result.length === 0) { if (!result || result.length === 0) {
@ -396,17 +398,11 @@ export async function searchStatementsByEmbedding(params: {
minSimilarity?: number; minSimilarity?: number;
}) { }) {
const query = ` const query = `
MATCH (statement:Statement) CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
YIELD node AS statement, score
WHERE statement.userId = $userId WHERE statement.userId = $userId
AND statement.invalidAt IS NULL AND statement.invalidAt IS NULL
AND statement.factEmbedding IS NOT NULL AND score >= $minSimilarity
WITH statement,
CASE
WHEN size(statement.factEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, statement.factEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
RETURN statement, score RETURN statement, score
ORDER BY score DESC ORDER BY score DESC
`; `;
@ -416,6 +412,7 @@ export async function searchStatementsByEmbedding(params: {
minSimilarity: params.minSimilarity, minSimilarity: params.minSimilarity,
limit: params.limit, limit: params.limit,
userId: params.userId, userId: params.userId,
topK: params.limit || 100,
}); });
if (!result || result.length === 0) { if (!result || result.length === 0) {

View File

@ -112,17 +112,10 @@ export class KnowledgeGraphService {
sessionContext, sessionContext,
); );
console.log("Normalized episode body:", normalizedEpisodeBody); const normalizedTime = Date.now() - startTime;
const relatedEpisodesEntities = await getRelatedEpisodesEntities({ logger.log(`Normalized episode body in ${normalizedTime} ms`);
embedding: await this.getEmbedding(normalizedEpisodeBody),
userId: params.userId,
minSimilarity: 0.7,
});
if ( if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
normalizedEpisodeBody === "NOTHING_TO_REMEMBER" ||
normalizedEpisodeBody === ""
) {
logger.log("Nothing to remember"); logger.log("Nothing to remember");
return { return {
episodeUuid: null, episodeUuid: null,
@ -131,6 +124,17 @@ export class KnowledgeGraphService {
}; };
} }
const relatedEpisodesEntities = await getRelatedEpisodesEntities({
embedding: await this.getEmbedding(normalizedEpisodeBody),
userId: params.userId,
minSimilarity: 0.7,
});
const relatedTime = Date.now();
logger.log(
`Related episodes entities in ${relatedTime - normalizedTime} ms`,
);
// Step 2: Episode Creation - Create or retrieve the episode // Step 2: Episode Creation - Create or retrieve the episode
const episode: EpisodicNode = { const episode: EpisodicNode = {
uuid: crypto.randomUUID(), uuid: crypto.randomUUID(),
@ -153,6 +157,9 @@ export class KnowledgeGraphService {
previousEpisodes, previousEpisodes,
); );
const extractedTime = Date.now();
logger.log(`Extracted entities in ${extractedTime - relatedTime} ms`);
// Step 3.1: Context-aware entity resolution with preset type evolution // Step 3.1: Context-aware entity resolution with preset type evolution
await this.resolveEntitiesWithContext( await this.resolveEntitiesWithContext(
extractedNodes, extractedNodes,
@ -165,15 +172,9 @@ export class KnowledgeGraphService {
episode, episode,
); );
console.log( const expandedTime = Date.now();
"Categorized entities:", logger.log(`Expanded entities in ${expandedTime - extractedTime} ms`);
categorizedEntities.primary.map(
(entity) => `primary: ${entity.name} - ${entity.type}`,
),
categorizedEntities.expanded.map(
(entity) => `expanded: ${entity.name} - ${entity.type}`,
),
);
// Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges // Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges
const extractedStatements = await this.extractStatements( const extractedStatements = await this.extractStatements(
episode, episode,
@ -181,6 +182,11 @@ export class KnowledgeGraphService {
previousEpisodes, previousEpisodes,
); );
const extractedStatementsTime = Date.now();
logger.log(
`Extracted statements in ${extractedStatementsTime - expandedTime} ms`,
);
// Step 5: Entity Resolution - Resolve extracted nodes to existing nodes or create new ones // Step 5: Entity Resolution - Resolve extracted nodes to existing nodes or create new ones
const resolvedTriples = await this.resolveExtractedNodes( const resolvedTriples = await this.resolveExtractedNodes(
extractedStatements, extractedStatements,
@ -188,6 +194,11 @@ export class KnowledgeGraphService {
previousEpisodes, previousEpisodes,
); );
const resolvedTriplesTime = Date.now();
logger.log(
`Resolved Entities in ${resolvedTriplesTime - extractedStatementsTime} ms`,
);
// Step 6: Statement Resolution - Resolve statements and detect contradictions // Step 6: Statement Resolution - Resolve statements and detect contradictions
const { resolvedStatements, invalidatedStatements } = const { resolvedStatements, invalidatedStatements } =
await this.resolveStatements( await this.resolveStatements(
@ -196,12 +207,22 @@ export class KnowledgeGraphService {
previousEpisodes, previousEpisodes,
); );
const resolvedStatementsTime = Date.now();
logger.log(
`Resolved statements in ${resolvedStatementsTime - resolvedTriplesTime} ms`,
);
// Step 7: ADd attributes to entity nodes // Step 7: ADd attributes to entity nodes
const updatedTriples = await this.addAttributesToEntities( const updatedTriples = await this.addAttributesToEntities(
resolvedStatements, resolvedStatements,
episode, episode,
); );
const updatedTriplesTime = Date.now();
logger.log(
`Updated triples in ${updatedTriplesTime - resolvedStatementsTime} ms`,
);
for (const triple of updatedTriples) { for (const triple of updatedTriples) {
const { subject, predicate, object, statement, provenance } = triple; const { subject, predicate, object, statement, provenance } = triple;
const safeTriple = { const safeTriple = {
@ -228,6 +249,9 @@ export class KnowledgeGraphService {
// Save triples in parallel for better performance // Save triples in parallel for better performance
await Promise.all(updatedTriples.map((triple) => saveTriple(triple))); await Promise.all(updatedTriples.map((triple) => saveTriple(triple)));
const saveTriplesTime = Date.now();
logger.log(`Saved triples in ${saveTriplesTime - updatedTriplesTime} ms`);
// Invalidate invalidated statements // Invalidate invalidated statements
await invalidateStatements({ statementIds: invalidatedStatements }); await invalidateStatements({ statementIds: invalidatedStatements });

View File

@ -39,7 +39,7 @@ export class SearchService {
query: string, query: string,
userId: string, userId: string,
options: SearchOptions = {}, options: SearchOptions = {},
): Promise<{ episodes: string[]; facts: string[]; relatedFacts: string[] }> { ): Promise<{ episodes: string[]; facts: { fact: string; validAt: Date }[] }> {
const startTime = Date.now(); const startTime = Date.now();
// Default options // Default options
@ -79,7 +79,6 @@ export class SearchService {
// // 3. Apply adaptive filtering based on score threshold and minimum count // // 3. Apply adaptive filtering based on score threshold and minimum count
const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts); const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
// const filteredResults = rankedStatements;
// 3. Return top results // 3. Return top results
const episodes = await getEpisodesByStatements(filteredResults); const episodes = await getEpisodesByStatements(filteredResults);
@ -97,22 +96,25 @@ export class SearchService {
// Log recall asynchronously (don't await to avoid blocking response) // Log recall asynchronously (don't await to avoid blocking response)
const responseTime = Date.now() - startTime; const responseTime = Date.now() - startTime;
this.logRecallAsync( logger.info(`Search completed in ${responseTime}ms for query: ${query}`);
query, // this.logRecallAsync(
userId, // query,
filteredResults, // userId,
opts, // filteredResults,
responseTime, // opts,
).catch((error) => { // responseTime,
logger.error("Failed to log recall event:", error); // ).catch((error) => {
}); // logger.error("Failed to log recall event:", error);
// });
// this.updateRecallCount(userId, episodes, filteredResults); // this.updateRecallCount(userId, episodes, filteredResults);
return { return {
episodes: episodes.map((episode) => episode.content), episodes: episodes.map((episode) => episode.content),
facts: filteredResults.map((statement) => statement.fact), facts: filteredResults.map((statement) => ({
relatedFacts: relatedFacts.map((fact) => fact.fact), fact: statement.fact,
validAt: statement.validAt,
})),
}; };
} }

View File

@ -515,9 +515,6 @@ export async function applyCohereReranking(
.filter((result) => result.cohereScore > 0.3); .filter((result) => result.cohereScore > 0.3);
const responseTime = Date.now() - startTime; const responseTime = Date.now() - startTime;
console.log(
rerankedResults.map((result) => `${result.fact} - ${result.cohereScore}`),
);
logger.info( logger.info(
`Cohere reranking completed: ${rerankedResults.length} results returned in ${responseTime}ms`, `Cohere reranking completed: ${rerankedResults.length} results returned in ${responseTime}ms`,
); );

View File

@ -131,23 +131,23 @@ export async function performVectorSearch(
// 1. Search for similar statements using Neo4j vector search with provenance count // 1. Search for similar statements using Neo4j vector search with provenance count
const cypher = ` const cypher = `
MATCH (s:Statement) CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
WHERE YIELD node AS s, score
(s.userId = $userId) WHERE s.userId = $userId
${timeframeCondition} AND score >= 0.7
${spaceCondition} ${timeframeCondition.replace("AND", "AND").replace("WHERE", "AND")}
WITH s, vector.similarity.cosine(s.factEmbedding, $embedding) AS score ${spaceCondition}
WHERE score > 0.7 OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s) WITH s, score, count(episode) as provenanceCount
WITH s, score, count(episode) as provenanceCount RETURN s, score, provenanceCount
RETURN s, score, provenanceCount ORDER BY score DESC
ORDER BY score DESC `;
`;
const params = { const params = {
embedding: query, embedding: query,
userId, userId,
validAt: options.endTime.toISOString(), validAt: options.endTime.toISOString(),
topk: options.limit || 100,
...(options.startTime && { startTime: options.startTime.toISOString() }), ...(options.startTime && { startTime: options.startTime.toISOString() }),
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }), ...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
}; };
@ -281,15 +281,13 @@ export async function extractEntitiesFromQuery(
try { try {
// Use vector similarity to find relevant entities // Use vector similarity to find relevant entities
const cypher = ` const cypher = `
// Match entities using vector similarity on name embeddings // Match entities using vector index on name embeddings
MATCH (e:Entity) CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
WHERE e.nameEmbedding IS NOT NULL YIELD node AS e, score
AND e.userId = $userId WHERE e.userId = $userId
WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score AND score > 0.7
WHERE score > 0.7
RETURN e RETURN e
ORDER BY score DESC ORDER BY score DESC
LIMIT 3
`; `;
const params = { const params = {

View File

@ -1,2 +0,0 @@
BASE_URL=https://core.heysol.ai
API_KEY=

View File

@ -1,423 +0,0 @@
#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const axios = require("axios");
/**
* LOCOMO Q&A Evaluation Script
* Evaluates question answering against ingested LOCOMO conversations
* Assumes conversations are already ingested via ingest_conversations.js
*/
class LocomoEvaluator {
constructor(baseUrl = "http://localhost:3033") {
this.baseUrl = baseUrl;
this.headers = {
Authorization: "Bearer rc_pat_kbc76ykt3gd81r6ctyeh8as5jryihbeqqvnsi2wt",
};
this.results = [];
// Create axios instance with default config
this.axios = axios.create({
baseURL: this.baseUrl,
headers: this.headers,
});
}
async makeRequest(endpoint, data) {
try {
const response = await this.axios.post(endpoint, data, {
headers: {
"Content-Type": "application/json",
},
});
return response.data;
} catch (error) {
if (error.response) {
throw new Error(`HTTP ${error.response.status}: ${JSON.stringify(error.response.data)}`);
} else if (error.request) {
throw new Error(`No response received: ${error.message}`);
} else {
throw new Error(`Request error: ${error.message}`);
}
}
}
async searchMemory(question, conversationId = null) {
try {
const response = await this.makeRequest("/api/v1/search", {
query: question,
});
return response;
} catch (error) {
console.error("Search error:", error.message);
return { results: [] };
}
}
async answerQuestion(question) {
try {
const response = await this.makeRequest("/api/v1/qa", {
question: question,
});
return response;
} catch (error) {
console.error("Q&A API error:", error.message);
return {
question: question,
generated_answer: "Error: Could not generate answer",
};
}
}
async evaluateAnswer(question, standardAnswer, generatedAnswer) {
const response = await this.makeRequest("/api/v1/evaluate", {
question,
standard_answer: standardAnswer,
generated_answer: generatedAnswer,
});
return {
label: response.label,
reasoning: response.reasoning,
matchRatio: response.matchRatio,
evaluationMethod: response.method,
};
}
async evaluateQuestion(question, expectedAnswer, evidence, conversationId, category) {
// NEW: Get generated answer from Q&A API
const qaResponse = await this.answerQuestion(question);
const generatedAnswer = qaResponse.generated_answer || "";
// NEW: Evaluate the generated answer against the expected answer
const evaluation = await this.evaluateAnswer(question, expectedAnswer, generatedAnswer);
return {
question,
expectedAnswer,
evidence,
category,
conversationId,
generatedAnswer: generatedAnswer,
evaluationResult: evaluation.label,
evaluationReasoning: evaluation.reasoning,
matchRatio: evaluation.matchRatio,
evaluationMethod: evaluation.evaluationMethod,
};
}
async evaluateConversation(conversation, conversationId) {
console.log(`Evaluating conversation ${conversationId}...`);
const batchSize = 15; // Process 15 questions concurrently
const qaResults = [];
const totalQuestions = conversation.qa.length;
let processed = 0;
console.log(`Processing ${totalQuestions} questions in batches of ${batchSize}...`);
for (let i = 0; i < totalQuestions; i += batchSize) {
const batch = conversation.qa.slice(i, i + batchSize);
const batchStartIndex = i;
console.log(
`Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(totalQuestions / batchSize)} (questions ${i + 1}-${Math.min(i + batchSize, totalQuestions)})`
);
// Create promises for the current batch
const batchPromises = batch.map(async (qa, batchIndex) => {
const questionIndex = batchStartIndex + batchIndex;
console.log(qa.question);
try {
const result = await this.evaluateQuestion(
qa.question,
qa.answer,
qa.evidence,
conversationId,
qa.category
);
return { result, index: questionIndex };
} catch (error) {
console.error(`Error evaluating question ${questionIndex + 1}:`, error.message);
return { error: error.message, index: questionIndex, qa };
}
});
// Process batch concurrently
const batchResults = await Promise.allSettled(batchPromises);
// Process results from this batch
batchResults.forEach((promiseResult) => {
if (promiseResult.status === "fulfilled") {
const { result, error, index, qa } = promiseResult.value;
if (result) {
qaResults.push(result);
} else if (error) {
// Add a placeholder result for failed evaluations
qaResults.push({
question: qa.question,
expectedAnswer: qa.answer ? qa.answer.toString() : qa.adversarial_answer.toString(),
evidence: qa.evidence,
category: qa.category,
conversationId,
error: error,
generatedAnswer: "Error: Evaluation failed",
evaluationResult: "ERROR",
evaluationReasoning: `Evaluation failed: ${error}`,
matchRatio: 0,
evaluationMethod: "error",
});
}
} else {
console.error(`Batch promise rejected:`, promiseResult.reason);
}
});
processed += batch.length;
console.log(` Completed ${processed}/${totalQuestions} questions`);
// Save results periodically (every batch or ~15 questions)
console.log(`Saving intermediate results...`);
this.saveResults();
// break;
}
console.log(`Completed evaluation of ${totalQuestions} questions`);
return qaResults;
}
async runEvaluation() {
console.log("Starting LOCOMO Q&A evaluation...");
// Load LOCOMO dataset
const dataPath = path.join(__dirname, "locomo10.json");
const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8"));
console.log(`Loaded ${conversations.length} conversations for evaluation`);
// Evaluate each conversation
for (let i = 0; i < conversations.length; i++) {
const conversation = conversations[i];
const conversationId = `locomo_${i + 1}`;
if (i === 0) {
try {
const results = await this.evaluateConversation(conversation, conversationId);
this.results.push({
conversationId,
results,
totalQuestions: conversation.qa.length,
});
} catch (error) {
console.error(`Error evaluating conversation ${conversationId}:`, error.message);
}
}
}
// Save and summarize results
this.saveResults();
this.printDetailedSummary();
}
saveResults() {
const resultsPath = path.join(__dirname, "evaluation_results.json");
const timestamp = new Date().toISOString();
const output = {
timestamp,
summary: this.calculateSummaryStats(),
conversations: this.results,
};
fs.writeFileSync(resultsPath, JSON.stringify(output, null, 2));
console.log(`\nResults saved to ${resultsPath}`);
}
calculateSummaryStats() {
const totalQuestions = this.results.reduce((sum, conv) => sum + conv.totalQuestions, 0);
const questionsWithContext = this.results.reduce(
(sum, conv) => sum + conv.results.filter((r) => r.hasContext).length,
0
);
const questionsWithAnswerInContext = this.results.reduce(
(sum, conv) => sum + conv.results.filter((r) => r.answerInContext).length,
0
);
// NEW: Q&A evaluation statistics
const questionsWithGeneratedAnswers = this.results.reduce(
(sum, conv) =>
sum +
conv.results.filter(
(r) => r.generatedAnswer && r.generatedAnswer !== "Error: Could not generate answer"
).length,
0
);
const correctAnswers = this.results.reduce(
(sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "CORRECT").length,
0
);
const wrongAnswers = this.results.reduce(
(sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "WRONG").length,
0
);
const errorAnswers = this.results.reduce(
(sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "ERROR").length,
0
);
// Category breakdown
const categoryStats = {};
this.results.forEach((conv) => {
conv.results.forEach((result) => {
const cat = result.category || "unknown";
if (!categoryStats[cat]) {
categoryStats[cat] = {
total: 0,
withContext: 0,
withAnswer: 0,
withGenerated: 0,
correct: 0,
wrong: 0,
errors: 0,
};
}
categoryStats[cat].total++;
if (result.hasContext) categoryStats[cat].withContext++;
if (result.answerInContext) categoryStats[cat].withAnswer++;
if (
result.generatedAnswer &&
result.generatedAnswer !== "Error: Could not generate answer" &&
result.generatedAnswer !== "Error: Evaluation failed"
) {
categoryStats[cat].withGenerated++;
}
if (result.evaluationResult === "CORRECT") categoryStats[cat].correct++;
if (result.evaluationResult === "WRONG") categoryStats[cat].wrong++;
if (result.evaluationResult === "ERROR") categoryStats[cat].errors++;
});
});
return {
totalQuestions,
questionsWithContext,
questionsWithAnswerInContext,
contextRetrievalRate: ((questionsWithContext / totalQuestions) * 100).toFixed(1),
answerFoundRate: ((questionsWithAnswerInContext / totalQuestions) * 100).toFixed(1),
// NEW: Q&A evaluation metrics
questionsWithGeneratedAnswers,
correctAnswers,
wrongAnswers,
errorAnswers,
qaSuccessRate:
totalQuestions > 0
? ((questionsWithGeneratedAnswers / totalQuestions) * 100).toFixed(1)
: "0.0",
answerAccuracyRate:
questionsWithGeneratedAnswers > 0
? ((correctAnswers / questionsWithGeneratedAnswers) * 100).toFixed(1)
: "0.0",
categoryBreakdown: categoryStats,
};
}
printDetailedSummary() {
const stats = this.calculateSummaryStats();
console.log("\n=== LOCOMO EVALUATION RESULTS ===");
console.log(`Total conversations: ${this.results.length}`);
console.log(`Total questions: ${stats.totalQuestions}`);
console.log(
`Questions with retrieved context: ${stats.questionsWithContext}/${stats.totalQuestions} (${stats.contextRetrievalRate}%)`
);
console.log(
`Questions with answer in context: ${stats.questionsWithAnswerInContext}/${stats.totalQuestions} (${stats.answerFoundRate}%)`
);
console.log("\n=== Q&A EVALUATION RESULTS ===");
console.log(
`Questions with generated answers: ${stats.questionsWithGeneratedAnswers}/${stats.totalQuestions} (${stats.qaSuccessRate}%)`
);
console.log(
`Correct answers: ${stats.correctAnswers}/${stats.questionsWithGeneratedAnswers} (${stats.answerAccuracyRate}%)`
);
console.log(`Wrong answers: ${stats.wrongAnswers}/${stats.questionsWithGeneratedAnswers}`);
if (stats.errorAnswers > 0) {
console.log(`Evaluation errors: ${stats.errorAnswers}/${stats.totalQuestions}`);
}
console.log("\n=== CATEGORY BREAKDOWN ===");
Object.entries(stats.categoryBreakdown).forEach(([category, catStats]) => {
const retrievalRate = ((catStats.withAnswer / catStats.total) * 100).toFixed(1);
const qaRate =
catStats.withGenerated > 0
? ((catStats.withGenerated / catStats.total) * 100).toFixed(1)
: "0.0";
const accuracyRate =
catStats.withGenerated > 0
? ((catStats.correct / catStats.withGenerated) * 100).toFixed(1)
: "0.0";
console.log(`Category ${category}:`);
console.log(` Total questions: ${catStats.total}`);
console.log(
` Context retrieval: ${catStats.withAnswer}/${catStats.total} (${retrievalRate}%)`
);
console.log(` Generated answers: ${catStats.withGenerated}/${catStats.total} (${qaRate}%)`);
console.log(
` Answer accuracy: ${catStats.correct}/${catStats.withGenerated} (${accuracyRate}%)`
);
if (catStats.errors > 0) {
console.log(` Evaluation errors: ${catStats.errors}/${catStats.total}`);
}
});
console.log("\n=== PERFORMANCE INSIGHTS ===");
const avgContextLength =
this.results.reduce(
(sum, conv) => sum + conv.results.reduce((s, r) => s + r.contextLength, 0),
0
) / stats.totalQuestions;
console.log(`Average context length: ${avgContextLength.toFixed(0)} characters`);
const avgMatchRatio =
this.results.reduce(
(sum, conv) => sum + conv.results.reduce((s, r) => s + (r.matchRatio || 0), 0),
0
) / stats.totalQuestions;
console.log(`Average answer match ratio: ${avgMatchRatio.toFixed(3)}`);
// Show evaluation method breakdown
const evaluationMethods = {};
this.results.forEach((conv) => {
conv.results.forEach((result) => {
const method = result.evaluationMethod || "unknown";
evaluationMethods[method] = (evaluationMethods[method] || 0) + 1;
});
});
console.log("\n=== EVALUATION SUMMARY ===");
console.log(
"This evaluation measures both retrieval performance and answer generation accuracy."
);
console.log("Generated answers are evaluated against gold standard answers.");
console.log("\n=== EVALUATION METHODS USED ===");
Object.entries(evaluationMethods).forEach(([method, count]) => {
const percentage = ((count / stats.totalQuestions) * 100).toFixed(1);
console.log(`${method}: ${count}/${stats.totalQuestions} (${percentage}%)`);
});
}
}
// Command line interface
if (require.main === module) {
const evaluator = new LocomoEvaluator();
evaluator.runEvaluation().catch(console.error);
}
module.exports = LocomoEvaluator;

File diff suppressed because it is too large Load Diff

View File

@ -1,304 +0,0 @@
#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const axios = require("axios");
/**
* LOCOMO Conversation Ingestion Script
* Ingests LOCOMO conversations into C.O.R.E memory system
* Tracks ingestion status to avoid duplicates
*/
class LocomoIngester {
constructor() {
this.baseUrl = "http://3.95.55.23:3033";
this.headers = {
Authorization: `Bearer rc_pat_kbc76ykt3gd81r6ctyeh8as5jryihbeqqvnsi2wt`,
};
this.statusFile = path.join(__dirname, "ingestion_status.json");
// Create axios instance with default config
this.axios = axios.create({
baseURL: this.baseUrl,
headers: this.headers,
timeout: 10000, // 10 second timeout
});
}
async makeRequest(endpoint, data) {
try {
const response = await this.axios.post(endpoint, data, {
headers: {
"Content-Type": "application/json",
},
});
return response.data;
} catch (error) {
if (error.response) {
// Server responded with error status
throw new Error(`HTTP ${error.response.status}: ${JSON.stringify(error.response.data)}`);
} else if (error.request) {
// Request was made but no response received
throw new Error(`No response received: ${error.message}`);
} else {
// Something else happened
throw new Error(`Request error: ${error.message}`);
}
}
}
loadIngestionStatus() {
try {
if (fs.existsSync(this.statusFile)) {
return JSON.parse(fs.readFileSync(this.statusFile, "utf8"));
}
} catch (error) {
console.warn("Could not load ingestion status:", error.message);
}
return { conversations: {}, timestamp: null };
}
saveIngestionStatus(status) {
fs.writeFileSync(this.statusFile, JSON.stringify(status, null, 2));
}
async ingestConversation(conversation, conversationId, forceReingest = false) {
const status = this.loadIngestionStatus();
const sessionId =
Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
if (status.conversations[conversationId] && !forceReingest) {
console.log(`Conversation ${conversationId} already ingested, skipping...`);
return false;
}
console.log(`BASE URL: ${this.baseUrl} ${process.env.BASE_URL}`);
console.log(`Ingesting conversation ${conversationId}...`);
const episodes = this.formatConversationForIngestion(conversation, conversationId);
let successCount = 0;
let errorCount = 0;
for (const [index, episode] of episodes.entries()) {
// if (index >= 0 && index < 20) {
try {
const payload = {
episodeBody: episode.content,
referenceTime: episode.metadata.timestamp,
source: "locomo_benchmark",
sessionId: `${sessionId}-${episode.metadata.sessionNumber}`,
};
await this.makeRequest("/api/v1/add", payload);
successCount++;
// Progress indicator
if ((index + 1) % 10 === 0) {
console.log(` Ingested ${index + 1}/${episodes.length} episodes`);
}
// Small delay to avoid overwhelming the system
await new Promise((resolve) => setTimeout(resolve, 50));
} catch (error) {
console.error(` Error ingesting episode ${index}:`, error.message);
errorCount++;
}
// }
}
// Update status
status.conversations[conversationId] = {
ingested: true,
timestamp: new Date().toISOString(),
totalEpisodes: episodes.length,
successCount,
errorCount,
};
status.timestamp = new Date().toISOString();
this.saveIngestionStatus(status);
console.log(` Completed: ${successCount} success, ${errorCount} errors`);
return true;
}
formatConversationForIngestion(conversation, conversationId) {
const episodes = [];
const conv = conversation.conversation;
// Extract speaker names
const speakerA = conv.speaker_a;
const speakerB = conv.speaker_b;
// Process each session
Object.keys(conv).forEach((key) => {
if (
key.startsWith("session_") &&
!key.endsWith("_date_time")
// ["session_1"].includes(key)
) {
console.log(`Processing session ${key}`);
const sessionNumber = key.replace("session_", "");
const sessionData = conv[key];
const sessionDateTime = conv[`session_${sessionNumber}_date_time`];
if (Array.isArray(sessionData)) {
sessionData.forEach((dialog, dialogIndex) => {
episodes.push({
content: `${dialog.speaker}: ${dialog.blip_caption ? `Shared ${dialog.blip_caption}. ${dialog.query}.` : ""} ${dialog.text}`,
metadata: {
conversationId,
sessionNumber: parseInt(sessionNumber),
dialogIndex,
dialogId: dialog.dia_id,
timestamp: sessionDateTime
? new Date(
Date.parse(
sessionDateTime.replace(
/(\d+):(\d+) (am|pm) on (\d+) (\w+), (\d+)/,
(_, hours, minutes, ampm, day, month, year) => {
const monthMap = {
January: 1,
Jan: 1,
February: 2,
Feb: 2,
March: 3,
Mar: 3,
April: 4,
Apr: 4,
May: 5,
June: 6,
Jun: 6,
July: 7,
Jul: 7,
August: 8,
Aug: 8,
September: 9,
Sep: 9,
October: 10,
Oct: 10,
November: 11,
Nov: 11,
December: 12,
Dec: 12,
};
const monthNum = monthMap[month] || 1;
return `${year}-${monthNum.toString().padStart(2, "0")}-${day.padStart(2, "0")} ${hours}:${minutes} ${ampm}`;
}
)
)
).toISOString()
: null,
speaker: dialog.speaker,
speakerA,
speakerB,
source: "locomo_benchmark",
},
});
});
}
}
});
return episodes;
}
async ingestAll(forceReingest = false) {
console.log("Starting LOCOMO conversation ingestion...");
if (forceReingest) {
console.log("Force re-ingestion enabled - will overwrite existing data");
}
// Load LOCOMO dataset
const dataPath = path.join(__dirname, "locomo10.json");
const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8"));
console.log(`Loaded ${conversations.length} conversations`);
let ingestedCount = 0;
let skippedCount = 0;
// Ingest each conversation
for (let i = 0; i < conversations.length; i++) {
if (i === 4) {
const conversation = conversations[i];
const conversationId = `locomo_${i + 1}`;
try {
const wasIngested = await this.ingestConversation(
conversation,
conversationId,
forceReingest
);
if (wasIngested) {
ingestedCount++;
} else {
skippedCount++;
}
} catch (error) {
console.error(`Error with conversation ${conversationId}:`, error.message);
}
}
}
this.printSummary(ingestedCount, skippedCount);
}
printSummary(ingestedCount, skippedCount) {
console.log("\n=== INGESTION SUMMARY ===");
console.log(`Conversations ingested: ${ingestedCount}`);
console.log(`Conversations skipped: ${skippedCount}`);
console.log(`Status file: ${this.statusFile}`);
const status = this.loadIngestionStatus();
const totalEpisodes = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.totalEpisodes || 0),
0
);
const totalSuccess = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.successCount || 0),
0
);
console.log(`Total episodes ingested: ${totalSuccess}/${totalEpisodes}`);
console.log("\nReady for evaluation phase!");
}
getStatus() {
const status = this.loadIngestionStatus();
const conversations = Object.keys(status.conversations).length;
const totalEpisodes = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.successCount || 0),
0
);
return {
conversations,
episodes: totalEpisodes,
lastIngestion: status.timestamp,
};
}
}
// Command line interface
if (require.main === module) {
const args = process.argv.slice(2);
const forceReingest = args.includes("--force");
const showStatus = args.includes("--status");
const ingester = new LocomoIngester();
if (showStatus) {
const status = ingester.getStatus();
console.log("LOCOMO Ingestion Status:");
console.log(` Conversations: ${status.conversations}`);
console.log(` Episodes: ${status.episodes}`);
console.log(` Last ingestion: ${status.lastIngestion || "Never"}`);
} else {
ingester.ingestAll(forceReingest).catch(console.error);
}
}
module.exports = LocomoIngester;

View File

@ -1,265 +0,0 @@
#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const axios = require("axios");
/**
* LOCOMO Session Summary Ingestion Script
* Ingests LOCOMO session summaries - comprehensive and available for all conversations
* More efficient than full conversations while preserving all key information
*/
class LocomoSessionIngester {
constructor(baseUrl = process.env.BASE_URL) {
this.baseUrl = baseUrl;
this.headers = {
Authorization: `Bearer ${process.env.API_KEY}`,
};
this.statusFile = path.join(__dirname, "session_ingestion_status.json");
// Create axios instance with default config
this.axios = axios.create({
baseURL: this.baseUrl,
headers: this.headers,
timeout: 10000,
});
}
async makeRequest(endpoint, data) {
try {
const response = await this.axios.post(endpoint, data, {
headers: {
"Content-Type": "application/json",
},
});
return response.data;
} catch (error) {
if (error.response) {
throw new Error(`HTTP ${error.response.status}: ${JSON.stringify(error.response.data)}`);
} else if (error.request) {
throw new Error(`No response received: ${error.message}`);
} else {
throw new Error(`Request error: ${error.message}`);
}
}
}
loadIngestionStatus() {
try {
if (fs.existsSync(this.statusFile)) {
return JSON.parse(fs.readFileSync(this.statusFile, "utf8"));
}
} catch (error) {
console.warn("Could not load ingestion status:", error.message);
}
return { conversations: {}, timestamp: null };
}
saveIngestionStatus(status) {
fs.writeFileSync(this.statusFile, JSON.stringify(status, null, 2));
}
formatSessionSummaryForIngestion(conversation, conversationId) {
const episodes = [];
const sessionSummary = conversation.session_summary;
const conv = conversation.conversation;
const speakerA = conv.speaker_a;
const speakerB = conv.speaker_b;
// Process each session summary
Object.entries(sessionSummary).forEach(([sessionKey, summary]) => {
const sessionNumber = sessionKey.replace("session_", "").replace("_summary", "");
episodes.push({
content: `Session ${sessionNumber} Summary: ${summary}`,
metadata: {
conversationId,
sessionNumber: parseInt(sessionNumber),
speakerA,
speakerB,
source: "locomo_sessions",
type: "session_summary",
},
});
});
return episodes;
}
async ingestConversation(conversation, conversationId, forceReingest = false) {
const status = this.loadIngestionStatus();
if (status.conversations[conversationId] && !forceReingest) {
console.log(`Conversation ${conversationId} already ingested, skipping...`);
return false;
}
console.log(`Ingesting session summaries for conversation ${conversationId}...`);
const episodes = this.formatSessionSummaryForIngestion(conversation, conversationId);
let successCount = 0;
let errorCount = 0;
console.log(` Total sessions to ingest: ${episodes.length}`);
for (const [index, episode] of episodes.entries()) {
try {
const payload = {
episodeBody: episode.content,
referenceTime: new Date(Date.now() + index * 1000).toISOString(),
source: "locomo_sessions",
};
await this.makeRequest("/api/v1/add", payload);
successCount++;
// Progress indicator
if ((index + 1) % 10 === 0) {
console.log(` Ingested ${index + 1}/${episodes.length} sessions`);
}
// Small delay
await new Promise((resolve) => setTimeout(resolve, 100));
} catch (error) {
console.error(` Error ingesting session ${index}:`, error.message);
errorCount++;
}
}
// Update status
status.conversations[conversationId] = {
ingested: true,
timestamp: new Date().toISOString(),
totalEpisodes: episodes.length,
successCount,
errorCount,
};
status.timestamp = new Date().toISOString();
this.saveIngestionStatus(status);
console.log(` Completed: ${successCount} success, ${errorCount} errors`);
return true;
}
async ingestAll(forceReingest = false) {
console.log("Starting LOCOMO session summary ingestion...");
if (forceReingest) {
console.log("Force re-ingestion enabled");
}
// Load LOCOMO dataset
const dataPath = path.join(__dirname, "data", "locomo10.json");
const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8"));
console.log(`Loaded ${conversations.length} conversations`);
let ingestedCount = 0;
let skippedCount = 0;
// Test connection first
try {
console.log("Testing connection...");
await this.makeRequest("/api/v1/add", {
episodeBody: "Session ingestion test",
referenceTime: new Date().toISOString(),
source: "test",
});
console.log("Connection test successful");
} catch (error) {
console.error("Connection test failed:", error.message);
return;
}
// Ingest all conversations
for (let i = 0; i < conversations.length; i++) {
const conversation = conversations[i];
const conversationId = `locomo_sessions_${i + 1}`;
if (i === 0) {
try {
const wasIngested = await this.ingestConversation(
conversation,
conversationId,
forceReingest
);
if (wasIngested) {
ingestedCount++;
} else {
skippedCount++;
}
} catch (error) {
console.error(`Error with conversation ${conversationId}:`, error.message);
}
}
}
this.printSummary(ingestedCount, skippedCount);
}
printSummary(ingestedCount, skippedCount) {
console.log("\n=== SESSION SUMMARY INGESTION ===");
console.log(`Conversations processed: ${ingestedCount}`);
console.log(`Conversations skipped: ${skippedCount}`);
const status = this.loadIngestionStatus();
const totalSessions = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.totalEpisodes || 0),
0
);
const totalSuccess = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.successCount || 0),
0
);
const totalErrors = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.errorCount || 0),
0
);
console.log(`Total sessions ingested: ${totalSuccess}/${totalSessions}`);
console.log(
`Success rate: ${((totalSuccess / (totalSuccess + totalErrors || 1)) * 100).toFixed(1)}%`
);
console.log("\nReady for evaluation phase!");
console.log("Benefits: Fast ingestion, comprehensive summaries, all conversations covered");
}
getStatus() {
const status = this.loadIngestionStatus();
const conversations = Object.keys(status.conversations).length;
const totalSessions = Object.values(status.conversations).reduce(
(sum, conv) => sum + (conv.successCount || 0),
0
);
return {
conversations,
sessions: totalSessions,
lastIngestion: status.timestamp,
};
}
}
// Command line interface
if (require.main === module) {
const args = process.argv.slice(2);
const forceReingest = args.includes("--force");
const showStatus = args.includes("--status");
const ingester = new LocomoSessionIngester();
if (showStatus) {
const status = ingester.getStatus();
console.log("LOCOMO Session Ingestion Status:");
console.log(` Conversations: ${status.conversations}`);
console.log(` Sessions: ${status.sessions}`);
console.log(` Last ingestion: ${status.lastIngestion || "Never"}`);
} else {
ingester.ingestAll(forceReingest).catch(console.error);
}
}
module.exports = LocomoSessionIngester;

View File

@ -1,40 +0,0 @@
{
"conversations": {
"locomo_1": {
"ingested": true,
"timestamp": "2025-08-22T14:47:13.572Z",
"totalEpisodes": 419,
"successCount": 419,
"errorCount": 0
},
"locomo_2": {
"ingested": true,
"timestamp": "2025-08-22T14:50:26.625Z",
"totalEpisodes": 369,
"successCount": 369,
"errorCount": 0
},
"locomo_3": {
"ingested": true,
"timestamp": "2025-08-22T14:55:30.262Z",
"totalEpisodes": 663,
"successCount": 663,
"errorCount": 0
},
"locomo_4": {
"ingested": true,
"timestamp": "2025-08-22T15:00:02.753Z",
"totalEpisodes": 629,
"successCount": 629,
"errorCount": 0
},
"locomo_5": {
"ingested": true,
"timestamp": "2025-08-22T15:04:17.340Z",
"totalEpisodes": 680,
"successCount": 680,
"errorCount": 0
}
},
"timestamp": "2025-08-22T15:04:17.340Z"
}

File diff suppressed because one or more lines are too long

View File

@ -1,295 +0,0 @@
{
"name": "locomo",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "locomo",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"axios": "^1.11.0"
}
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"license": "MIT"
},
"node_modules/axios": {
"version": "1.11.0",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.11.0.tgz",
"integrity": "sha512-1Lx3WLFQWm3ooKDYZD1eXmoGO9fxYQjrycfHFC8P0sCfQVXyROp0p9PFWBehewBOdCwHc+f/b8I0fMto5eSfwA==",
"license": "MIT",
"dependencies": {
"follow-redirects": "^1.15.6",
"form-data": "^4.0.4",
"proxy-from-env": "^1.1.0"
}
},
"node_modules/call-bind-apply-helpers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"license": "MIT",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"license": "MIT",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
"es-errors": "^1.3.0",
"gopd": "^1.2.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-define-property": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-errors": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-object-atoms": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-set-tostringtag": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"get-intrinsic": "^1.2.6",
"has-tostringtag": "^1.0.2",
"hasown": "^2.0.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/follow-redirects": {
"version": "1.15.11",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
"integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/RubenVerborgh"
}
],
"license": "MIT",
"engines": {
"node": ">=4.0"
},
"peerDependenciesMeta": {
"debug": {
"optional": true
}
}
},
"node_modules/form-data": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
"license": "MIT",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"es-set-tostringtag": "^2.1.0",
"hasown": "^2.0.2",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/function-bind": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-intrinsic": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
"es-define-property": "^1.0.1",
"es-errors": "^1.3.0",
"es-object-atoms": "^1.1.1",
"function-bind": "^1.1.2",
"get-proto": "^1.0.1",
"gopd": "^1.2.0",
"has-symbols": "^1.1.0",
"hasown": "^2.0.2",
"math-intrinsics": "^1.1.0"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
"es-object-atoms": "^1.0.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/gopd": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-symbols": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-tostringtag": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/hasown": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"license": "MIT",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"license": "MIT",
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
"license": "MIT"
}
}
}

View File

@ -1,23 +0,0 @@
{
"name": "locomo",
"version": "1.0.0",
"description": "**Authors**: [Adyasha Maharana](https://adymaharana.github.io/), [Dong-Ho Lee](https://www.danny-lee.info/), [Sergey Tulyakov](https://stulyakov.com/), [Mohit Bansal](https://www.cs.unc.edu/~mbansal/), [Francesco Barbieri](https://fvancesco.github.io/) and [Yuwei Fang](https://yuwfan.github.io/)",
"main": "debug_auth.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/snap-research/locomo.git"
},
"keywords": [],
"author": "",
"license": "ISC",
"bugs": {
"url": "https://github.com/snap-research/locomo/issues"
},
"homepage": "https://github.com/snap-research/locomo#readme",
"dependencies": {
"axios": "^1.11.0"
}
}