mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-10 23:48:26 +00:00
Feat: Delete episode nodes
Enhance: knowledge graphs with implicit relationships - Added a new API route for deleting episodes, including related statements and entities. - Introduced error handling for unauthorized access and non-existent episodes. - Enhanced the KnowledgeGraphService with methods for resolving entities and managing relationships during deletions. - Updated entity and episode models to support new deletion logic and ensure data integrity.
This commit is contained in:
parent
50c4e2bcce
commit
28803bec17
60
apps/webapp/app/routes/api.v1.episode.delete.tsx
Normal file
60
apps/webapp/app/routes/api.v1.episode.delete.tsx
Normal file
@ -0,0 +1,60 @@
|
||||
import { z } from "zod";
|
||||
import { json } from "@remix-run/node";
|
||||
import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
|
||||
import { deleteEpisodeWithRelatedNodes } from "~/services/graphModels/episode";
|
||||
|
||||
export const DeleteEpisodeBodyRequest = z.object({
|
||||
episodeUuid: z.string().uuid("Episode UUID must be a valid UUID"),
|
||||
});
|
||||
|
||||
const { action, loader } = createActionApiRoute(
|
||||
{
|
||||
body: DeleteEpisodeBodyRequest,
|
||||
allowJWT: true,
|
||||
method: "DELETE",
|
||||
authorization: {
|
||||
action: "delete",
|
||||
},
|
||||
corsStrategy: "all",
|
||||
},
|
||||
async ({ body, authentication }) => {
|
||||
try {
|
||||
const result = await deleteEpisodeWithRelatedNodes({
|
||||
episodeUuid: body.episodeUuid,
|
||||
userId: authentication.userId,
|
||||
});
|
||||
|
||||
if (!result.episodeDeleted) {
|
||||
return json(
|
||||
{
|
||||
error: "Episode not found or unauthorized",
|
||||
code: "not_found"
|
||||
},
|
||||
{ status: 404 }
|
||||
);
|
||||
}
|
||||
|
||||
return json({
|
||||
success: true,
|
||||
message: "Episode deleted successfully",
|
||||
deleted: {
|
||||
episode: result.episodeDeleted,
|
||||
statements: result.statementsDeleted,
|
||||
entities: result.entitiesDeleted,
|
||||
facts: result.factsDeleted,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error deleting episode:", error);
|
||||
return json(
|
||||
{
|
||||
error: "Failed to delete episode",
|
||||
code: "internal_error"
|
||||
},
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
export { action, loader };
|
||||
@ -2,6 +2,19 @@ import type { EntityNode } from "@core/types";
|
||||
import { runQuery } from "~/lib/neo4j.server";
|
||||
|
||||
export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
// Debug: Log entity to identify missing typeEmbedding
|
||||
if (!entity.typeEmbedding) {
|
||||
console.error(`Entity missing typeEmbedding:`, {
|
||||
uuid: entity.uuid,
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
hasNameEmbedding: !!entity.nameEmbedding,
|
||||
});
|
||||
throw new Error(
|
||||
`Entity ${entity.name} (${entity.type}) is missing typeEmbedding`,
|
||||
);
|
||||
}
|
||||
|
||||
const query = `
|
||||
MERGE (n:Entity {uuid: $uuid})
|
||||
ON CREATE SET
|
||||
@ -9,6 +22,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
n.type = $type,
|
||||
n.attributes = $attributes,
|
||||
n.nameEmbedding = $nameEmbedding,
|
||||
n.typeEmbedding = $typeEmbedding,
|
||||
n.createdAt = $createdAt,
|
||||
n.userId = $userId,
|
||||
n.space = $space
|
||||
@ -17,6 +31,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
n.type = $type,
|
||||
n.attributes = $attributes,
|
||||
n.nameEmbedding = $nameEmbedding,
|
||||
n.typeEmbedding = $typeEmbedding,
|
||||
n.space = $space
|
||||
RETURN n.uuid as uuid
|
||||
`;
|
||||
@ -27,6 +42,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
type: entity.type,
|
||||
attributes: JSON.stringify(entity.attributes || {}),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
createdAt: entity.createdAt.toISOString(),
|
||||
userId: entity.userId,
|
||||
space: entity.space || null,
|
||||
@ -52,6 +68,7 @@ export async function getEntity(uuid: string): Promise<EntityNode | null> {
|
||||
type: entity.type,
|
||||
attributes: JSON.parse(entity.attributes || "{}"),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
createdAt: new Date(entity.createdAt),
|
||||
userId: entity.userId,
|
||||
space: entity.space,
|
||||
@ -85,6 +102,43 @@ export async function findSimilarEntities(params: {
|
||||
type: entity.type,
|
||||
attributes: JSON.parse(entity.attributes || "{}"),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
createdAt: new Date(entity.createdAt),
|
||||
userId: entity.userId,
|
||||
space: entity.space,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function findSimilarEntitiesWithSameType(params: {
|
||||
queryEmbedding: number[];
|
||||
entityType: string;
|
||||
limit: number;
|
||||
threshold: number;
|
||||
userId: string;
|
||||
}): Promise<EntityNode[]> {
|
||||
const query = `
|
||||
MATCH (entity:Entity)
|
||||
WHERE entity.nameEmbedding IS NOT NULL
|
||||
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
|
||||
WHERE score >= $threshold
|
||||
AND entity.userId = $userId
|
||||
AND entity.type = $entityType
|
||||
RETURN entity, score
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
|
||||
const result = await runQuery(query, params);
|
||||
return result.map((record) => {
|
||||
const entity = record.get("entity").properties;
|
||||
|
||||
return {
|
||||
uuid: entity.uuid,
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
attributes: JSON.parse(entity.attributes || "{}"),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
createdAt: new Date(entity.createdAt),
|
||||
userId: entity.userId,
|
||||
space: entity.space,
|
||||
@ -115,9 +169,89 @@ export async function findExactPredicateMatches(params: {
|
||||
type: entity.type,
|
||||
attributes: JSON.parse(entity.attributes || "{}"),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
createdAt: new Date(entity.createdAt),
|
||||
userId: entity.userId,
|
||||
space: entity.space,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace entity references in all statements with a new entity
|
||||
* Updates all statements where the old entity appears as subject, predicate, or object
|
||||
*/
|
||||
export async function replaceEntityReferences(
|
||||
evolvedEntity: EntityNode,
|
||||
oldEntityUUIDs: string[],
|
||||
): Promise<void> {
|
||||
// Save the new entity first to ensure it exists in the database
|
||||
await saveEntity(evolvedEntity);
|
||||
|
||||
// Then update all references from old entity to new entity
|
||||
oldEntityUUIDs.forEach(async (oldEntityUUID) => {
|
||||
await updateStatementsWithNewEntity(oldEntityUUID, evolvedEntity.uuid);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Update all statements that reference an old entity to use the new entity
|
||||
* This includes updating subject, predicate, and object relationships
|
||||
*/
|
||||
export async function updateStatementsWithNewEntity(
|
||||
oldEntityUUID: string,
|
||||
newEntityUUID: string,
|
||||
): Promise<void> {
|
||||
const queries = [
|
||||
// Update statements where old entity is the subject
|
||||
`
|
||||
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:SUBJECT]->(statement:Statement)
|
||||
MATCH (newEntity:Entity {uuid: $newEntityUUID})
|
||||
DELETE oldEntity-[:SUBJECT]->statement
|
||||
CREATE newEntity-[:SUBJECT]->statement
|
||||
`,
|
||||
// Update statements where old entity is the predicate
|
||||
`
|
||||
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:PREDICATE]->(statement:Statement)
|
||||
MATCH (newEntity:Entity {uuid: $newEntityUUID})
|
||||
DELETE oldEntity-[:PREDICATE]->statement
|
||||
CREATE newEntity-[:PREDICATE]->statement
|
||||
`,
|
||||
// Update statements where old entity is the object
|
||||
`
|
||||
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:OBJECT]->(statement:Statement)
|
||||
MATCH (newEntity:Entity {uuid: $newEntityUUID})
|
||||
DELETE oldEntity-[:OBJECT]->statement
|
||||
CREATE newEntity-[:OBJECT]->statement
|
||||
`,
|
||||
];
|
||||
|
||||
const params = {
|
||||
oldEntityUUID,
|
||||
newEntityUUID,
|
||||
};
|
||||
|
||||
// Execute all update queries
|
||||
for (const query of queries) {
|
||||
await runQuery(query, params);
|
||||
}
|
||||
|
||||
// Optional: Delete the old entity if no longer referenced
|
||||
await deleteEntityIfUnreferenced(oldEntityUUID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete an entity if it's no longer referenced by any statements
|
||||
*/
|
||||
async function deleteEntityIfUnreferenced(entityUUID: string): Promise<void> {
|
||||
const checkQuery = `
|
||||
MATCH (entity:Entity {uuid: $entityUUID})
|
||||
OPTIONAL MATCH (entity)-[r]-()
|
||||
WITH entity, count(r) as relationshipCount
|
||||
WHERE relationshipCount = 0
|
||||
DELETE entity
|
||||
RETURN count(entity) as deletedCount
|
||||
`;
|
||||
|
||||
await runQuery(checkQuery, { entityUUID });
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { runQuery } from "~/lib/neo4j.server";
|
||||
import type { EpisodicNode } from "@core/types";
|
||||
import type { EntityNode, EpisodicNode } from "@core/types";
|
||||
|
||||
export async function saveEpisode(episode: EpisodicNode): Promise<string> {
|
||||
const query = `
|
||||
@ -178,3 +178,134 @@ export async function searchEpisodesByEmbedding(params: {
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
// Delete episode and its related nodes safely
|
||||
export async function deleteEpisodeWithRelatedNodes(params: {
|
||||
episodeUuid: string;
|
||||
userId: string;
|
||||
}): Promise<{
|
||||
episodeDeleted: boolean;
|
||||
statementsDeleted: number;
|
||||
entitiesDeleted: number;
|
||||
factsDeleted: number;
|
||||
}> {
|
||||
// Step 1: Check if episode exists
|
||||
const episodeCheck = await runQuery(
|
||||
`MATCH (e:Episode {uuid: $episodeUuid, userId: $userId}) RETURN e`,
|
||||
{ episodeUuid: params.episodeUuid, userId: params.userId },
|
||||
);
|
||||
|
||||
if (!episodeCheck || episodeCheck.length === 0) {
|
||||
return {
|
||||
episodeDeleted: false,
|
||||
statementsDeleted: 0,
|
||||
entitiesDeleted: 0,
|
||||
factsDeleted: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 2: Find statements that are ONLY connected to this episode
|
||||
const statementsToDelete = await runQuery(
|
||||
`
|
||||
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})-[:HAS_PROVENANCE]->(stmt:Statement)
|
||||
WHERE NOT EXISTS {
|
||||
MATCH (otherEpisode:Episode)-[:HAS_PROVENANCE]->(stmt)
|
||||
WHERE otherEpisode.uuid <> $episodeUuid AND otherEpisode.userId = $userId
|
||||
}
|
||||
RETURN stmt.uuid as statementUuid
|
||||
`,
|
||||
{ episodeUuid: params.episodeUuid, userId: params.userId },
|
||||
);
|
||||
|
||||
const statementUuids = statementsToDelete.map((r) => r.get("statementUuid"));
|
||||
|
||||
// Step 3: Find entities that are ONLY connected to statements we're deleting
|
||||
const entitiesToDelete = await runQuery(
|
||||
`
|
||||
MATCH (stmt:Statement)-[r:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(entity:Entity)
|
||||
WHERE stmt.uuid IN $statementUuids AND stmt.userId = $userId
|
||||
AND NOT EXISTS {
|
||||
MATCH (otherStmt:Statement)-[:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(entity)
|
||||
WHERE otherStmt.userId = $userId AND NOT otherStmt.uuid IN $statementUuids
|
||||
}
|
||||
RETURN DISTINCT entity.uuid as entityUuid
|
||||
`,
|
||||
{ statementUuids, userId: params.userId },
|
||||
);
|
||||
|
||||
const entityUuids = entitiesToDelete.map((r) => r.get("entityUuid"));
|
||||
|
||||
// Step 4: Delete statements
|
||||
if (statementUuids.length > 0) {
|
||||
await runQuery(
|
||||
`
|
||||
MATCH (stmt:Statement {userId: $userId})
|
||||
WHERE stmt.uuid IN $statementUuids
|
||||
DETACH DELETE stmt
|
||||
`,
|
||||
{ statementUuids, userId: params.userId },
|
||||
);
|
||||
}
|
||||
|
||||
// Step 5: Delete orphaned entities
|
||||
if (entityUuids.length > 0) {
|
||||
await runQuery(
|
||||
`
|
||||
MATCH (entity:Entity {userId: $userId})
|
||||
WHERE entity.uuid IN $entityUuids
|
||||
DETACH DELETE entity
|
||||
`,
|
||||
{ entityUuids, userId: params.userId },
|
||||
);
|
||||
}
|
||||
|
||||
// Step 6: Delete the episode
|
||||
await runQuery(
|
||||
`
|
||||
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})
|
||||
DETACH DELETE episode
|
||||
`,
|
||||
{ episodeUuid: params.episodeUuid, userId: params.userId },
|
||||
);
|
||||
|
||||
return {
|
||||
episodeDeleted: true,
|
||||
statementsDeleted: statementUuids.length,
|
||||
entitiesDeleted: entityUuids.length,
|
||||
factsDeleted: statementUuids.length,
|
||||
};
|
||||
}
|
||||
|
||||
export async function getRelatedEpisodesEntities(params: {
|
||||
embedding: number[];
|
||||
userId: string;
|
||||
limit?: number;
|
||||
minSimilarity?: number;
|
||||
}) {
|
||||
const query = `
|
||||
MATCH (episode:Episode {userId: $userId})
|
||||
WHERE episode.contentEmbedding IS NOT NULL
|
||||
WITH episode,
|
||||
CASE
|
||||
WHEN size(episode.contentEmbedding) = size($embedding)
|
||||
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
|
||||
ELSE 0
|
||||
END AS score
|
||||
WHERE score >= $minSimilarity
|
||||
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
|
||||
WHERE entity IS NOT NULL
|
||||
RETURN DISTINCT entity`;
|
||||
|
||||
const result = await runQuery(query, {
|
||||
embedding: params.embedding,
|
||||
minSimilarity: params.minSimilarity,
|
||||
userId: params.userId,
|
||||
});
|
||||
|
||||
return result
|
||||
.map((record) => {
|
||||
const entity = record.get("entity");
|
||||
return entity ? (entity.properties as EntityNode) : null;
|
||||
})
|
||||
.filter((entity): entity is EntityNode => entity !== null);
|
||||
}
|
||||
|
||||
@ -243,6 +243,7 @@ export async function getTripleForStatement({
|
||||
name: subjectProps.name,
|
||||
type: subjectProps.type,
|
||||
nameEmbedding: subjectProps.nameEmbedding,
|
||||
typeEmbedding: subjectProps.typeEmbedding,
|
||||
attributes: subjectProps.attributesJson
|
||||
? JSON.parse(subjectProps.attributesJson)
|
||||
: {},
|
||||
@ -255,6 +256,7 @@ export async function getTripleForStatement({
|
||||
name: predicateProps.name,
|
||||
type: predicateProps.type,
|
||||
nameEmbedding: predicateProps.nameEmbedding,
|
||||
typeEmbedding: predicateProps.typeEmbedding,
|
||||
attributes: predicateProps.attributesJson
|
||||
? JSON.parse(predicateProps.attributesJson)
|
||||
: {},
|
||||
@ -267,6 +269,7 @@ export async function getTripleForStatement({
|
||||
name: objectProps.name,
|
||||
type: objectProps.type,
|
||||
nameEmbedding: objectProps.nameEmbedding,
|
||||
typeEmbedding: objectProps.typeEmbedding,
|
||||
attributes: objectProps.attributesJson
|
||||
? JSON.parse(objectProps.attributesJson)
|
||||
: {},
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import { type CoreMessage } from "ai";
|
||||
import {
|
||||
type ExtractedTripleData,
|
||||
type AddEpisodeParams,
|
||||
type EntityNode,
|
||||
type EpisodicNode,
|
||||
@ -20,11 +21,14 @@ import {
|
||||
} from "./prompts/statements";
|
||||
import {
|
||||
getRecentEpisodes,
|
||||
getRelatedEpisodesEntities,
|
||||
searchEpisodesByEmbedding,
|
||||
} from "./graphModels/episode";
|
||||
import {
|
||||
findExactPredicateMatches,
|
||||
findSimilarEntities,
|
||||
findSimilarEntitiesWithSameType,
|
||||
replaceEntityReferences,
|
||||
} from "./graphModels/entity";
|
||||
import {
|
||||
findContradictoryStatements,
|
||||
@ -35,7 +39,13 @@ import {
|
||||
searchStatementsByEmbedding,
|
||||
} from "./graphModels/statement";
|
||||
import { getEmbedding, makeModelCall } from "~/lib/model.server";
|
||||
import { Apps, getNodeTypes, getNodeTypesString } from "~/utils/presets/nodes";
|
||||
import {
|
||||
Apps,
|
||||
getNodeTypes,
|
||||
getNodeTypesString,
|
||||
isPresetType,
|
||||
getAllPresetTypes,
|
||||
} from "~/utils/presets/nodes";
|
||||
import { normalizePrompt } from "./prompts";
|
||||
|
||||
// Default number of previous episodes to retrieve for context
|
||||
@ -72,6 +82,12 @@ export class KnowledgeGraphService {
|
||||
params.userId,
|
||||
);
|
||||
|
||||
const relatedEpisodesEntities = await getRelatedEpisodesEntities({
|
||||
embedding: await this.getEmbedding(normalizedEpisodeBody),
|
||||
userId: params.userId,
|
||||
minSimilarity: 0.7,
|
||||
});
|
||||
|
||||
if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
|
||||
logger.log("Nothing to remember");
|
||||
return;
|
||||
@ -99,10 +115,20 @@ export class KnowledgeGraphService {
|
||||
previousEpisodes,
|
||||
);
|
||||
|
||||
// Step 3.1: Context-aware entity resolution with preset type evolution
|
||||
await this.resolveEntitiesWithContext(
|
||||
extractedNodes,
|
||||
relatedEpisodesEntities,
|
||||
);
|
||||
|
||||
// Step 3.2: Handle preset type logic - expand entities for statement extraction
|
||||
const entitiesForStatementExtraction =
|
||||
await this.expandEntitiesForStatements(extractedNodes, episode);
|
||||
|
||||
// Step 4: Statement Extraction - Extract statements (triples) instead of direct edges
|
||||
const extractedStatements = await this.extractStatements(
|
||||
episode,
|
||||
extractedNodes,
|
||||
entitiesForStatementExtraction,
|
||||
previousEpisodes,
|
||||
);
|
||||
|
||||
@ -126,9 +152,21 @@ export class KnowledgeGraphService {
|
||||
for (const triple of updatedTriples) {
|
||||
const { subject, predicate, object, statement, provenance } = triple;
|
||||
const safeTriple = {
|
||||
subject: { ...subject, nameEmbedding: undefined },
|
||||
predicate: { ...predicate, nameEmbedding: undefined },
|
||||
object: { ...object, nameEmbedding: undefined },
|
||||
subject: {
|
||||
...subject,
|
||||
nameEmbedding: undefined,
|
||||
typeEmbedding: undefined,
|
||||
},
|
||||
predicate: {
|
||||
...predicate,
|
||||
nameEmbedding: undefined,
|
||||
typeEmbedding: undefined,
|
||||
},
|
||||
object: {
|
||||
...object,
|
||||
nameEmbedding: undefined,
|
||||
typeEmbedding: undefined,
|
||||
},
|
||||
statement: { ...statement, factEmbedding: undefined },
|
||||
provenance: { ...provenance, contentEmbedding: undefined },
|
||||
};
|
||||
@ -206,9 +244,8 @@ export class KnowledgeGraphService {
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
attributes: entity.attributes || {},
|
||||
nameEmbedding: await this.getEmbedding(
|
||||
`${entity.type}: ${entity.name}`,
|
||||
),
|
||||
nameEmbedding: await this.getEmbedding(entity.name),
|
||||
typeEmbedding: await this.getEmbedding(entity.type),
|
||||
createdAt: new Date(),
|
||||
userId: episode.userId,
|
||||
})),
|
||||
@ -257,7 +294,8 @@ export class KnowledgeGraphService {
|
||||
}
|
||||
|
||||
// Parse the statements from the LLM response
|
||||
const extractedTriples = JSON.parse(responseText || "{}").edges || [];
|
||||
const extractedTriples: ExtractedTripleData[] =
|
||||
JSON.parse(responseText || "{}").edges || [];
|
||||
|
||||
// Create maps to deduplicate entities by name within this extraction
|
||||
const predicateMap = new Map<string, EntityNode>();
|
||||
@ -272,9 +310,8 @@ export class KnowledgeGraphService {
|
||||
name: triple.predicate,
|
||||
type: "Predicate",
|
||||
attributes: {},
|
||||
nameEmbedding: await this.getEmbedding(
|
||||
`Predicate: ${triple.predicate}`,
|
||||
),
|
||||
nameEmbedding: await this.getEmbedding(triple.predicate),
|
||||
typeEmbedding: await this.getEmbedding("Predicate"),
|
||||
createdAt: new Date(),
|
||||
userId: episode.userId,
|
||||
};
|
||||
@ -284,15 +321,18 @@ export class KnowledgeGraphService {
|
||||
|
||||
// Convert extracted triples to Triple objects with Statement nodes
|
||||
const triples = await Promise.all(
|
||||
// Fix: Type 'any'.
|
||||
extractedTriples.map(async (triple: any) => {
|
||||
// Find the subject and object nodes
|
||||
extractedTriples.map(async (triple: ExtractedTripleData) => {
|
||||
// Find the subject and object nodes by matching both name and type
|
||||
const subjectNode = extractedEntities.find(
|
||||
(node) => node.name.toLowerCase() === triple.source.toLowerCase(),
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.source.toLowerCase() &&
|
||||
node.type.toLowerCase() === triple.sourceType.toLowerCase(),
|
||||
);
|
||||
|
||||
const objectNode = extractedEntities.find(
|
||||
(node) => node.name.toLowerCase() === triple.target.toLowerCase(),
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.target.toLowerCase() &&
|
||||
node.type.toLowerCase() === triple.targetType.toLowerCase(),
|
||||
);
|
||||
|
||||
// Get the deduplicated predicate node
|
||||
@ -327,6 +367,89 @@ export class KnowledgeGraphService {
|
||||
return triples.filter(Boolean) as Triple[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand entities for statement extraction by adding existing preset entities
|
||||
*/
|
||||
private async expandEntitiesForStatements(
|
||||
extractedNodes: EntityNode[],
|
||||
episode: EpisodicNode,
|
||||
): Promise<EntityNode[]> {
|
||||
const allAppEnumValues = Object.values(Apps);
|
||||
const expandedEntities = [...extractedNodes];
|
||||
|
||||
// For each extracted entity, check if we need to add existing preset entities
|
||||
for (const entity of extractedNodes) {
|
||||
const newIsPreset = isPresetType(entity.type, allAppEnumValues);
|
||||
|
||||
// Find similar entities with same name
|
||||
const similarEntities = await findSimilarEntities({
|
||||
queryEmbedding: entity.nameEmbedding,
|
||||
limit: 5,
|
||||
threshold: 0.7,
|
||||
userId: episode.userId,
|
||||
});
|
||||
|
||||
for (const existingEntity of similarEntities) {
|
||||
const existingIsPreset = isPresetType(
|
||||
existingEntity.type,
|
||||
allAppEnumValues,
|
||||
);
|
||||
|
||||
// If both are preset types, include both for statement extraction
|
||||
if (newIsPreset && existingIsPreset) {
|
||||
// Add the existing entity to the list if not already present
|
||||
if (!expandedEntities.some((e) => e.uuid === existingEntity.uuid)) {
|
||||
expandedEntities.push(existingEntity);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return expandedEntities;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve entities with context-aware deduplication and preset type evolution
|
||||
* Only merges entities that appear in semantically related episodes
|
||||
*/
|
||||
private async resolveEntitiesWithContext(
|
||||
extractedNodes: EntityNode[],
|
||||
relatedEpisodesEntities: EntityNode[],
|
||||
): Promise<void> {
|
||||
const allAppEnumValues = Object.values(Apps);
|
||||
|
||||
extractedNodes.map(async (newEntity) => {
|
||||
// Find same-name entities in related episodes (contextually relevant)
|
||||
const sameNameInContext = relatedEpisodesEntities.filter(
|
||||
(existing) =>
|
||||
existing.name.toLowerCase() === newEntity.name.toLowerCase(),
|
||||
);
|
||||
|
||||
if (sameNameInContext.length > 0) {
|
||||
let existingEntityIds: string[] = [];
|
||||
sameNameInContext.forEach(async (existingEntity) => {
|
||||
const newIsPreset = isPresetType(newEntity.type, allAppEnumValues);
|
||||
const existingIsPreset = isPresetType(
|
||||
existingEntity.type,
|
||||
allAppEnumValues,
|
||||
);
|
||||
|
||||
if (newIsPreset && !existingIsPreset) {
|
||||
// New is preset, existing is custom - evolve existing entity to preset type
|
||||
console.log(
|
||||
`Evolving entity: ${existingEntity.name} from ${existingEntity.type} to ${newEntity.type}`,
|
||||
);
|
||||
existingEntityIds.push(existingEntity.uuid);
|
||||
}
|
||||
});
|
||||
|
||||
if (existingEntityIds.length > 0) {
|
||||
await replaceEntityReferences(newEntity, existingEntityIds);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve extracted nodes to existing nodes or create new ones
|
||||
*/
|
||||
@ -398,8 +521,9 @@ export class KnowledgeGraphService {
|
||||
// Step 2a: Find similar entities for non-predicate entities
|
||||
const similarEntitiesResults = await Promise.all(
|
||||
nonPredicates.map(async (entity) => {
|
||||
const similarEntities = await findSimilarEntities({
|
||||
const similarEntities = await findSimilarEntitiesWithSameType({
|
||||
queryEmbedding: entity.nameEmbedding,
|
||||
entityType: entity.type,
|
||||
limit: 5,
|
||||
threshold: 0.7,
|
||||
userId: episode.userId,
|
||||
@ -437,11 +561,6 @@ export class KnowledgeGraphService {
|
||||
...exactPredicateResults,
|
||||
];
|
||||
|
||||
// If no similar entities found for any entity, return original triples
|
||||
if (allEntityResults.length === 0) {
|
||||
return triples;
|
||||
}
|
||||
|
||||
// Step 3: Prepare context for LLM deduplication
|
||||
const dedupeContext = {
|
||||
extracted_nodes: allEntityResults.map((result, index) => ({
|
||||
@ -451,7 +570,7 @@ export class KnowledgeGraphService {
|
||||
duplication_candidates: result.similarEntities.map((candidate, j) => ({
|
||||
idx: j,
|
||||
name: candidate.name,
|
||||
entity_types: candidate.type,
|
||||
entity_type: candidate.type,
|
||||
})),
|
||||
})),
|
||||
episode_content: episode ? episode.content : "",
|
||||
|
||||
@ -26,20 +26,37 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||
|
||||
2. **Entity Classification**:
|
||||
- CRITICAL: You MUST ONLY use entity types provided in the ENTITY_TYPES section.
|
||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
||||
- Assign the appropriate type for each one.
|
||||
- Classify pronouns (I, me, you, etc.) as "ALIAS" entities.
|
||||
- DO NOT invent new entity types that are not in the ENTITY_TYPES section.
|
||||
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
|
||||
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
|
||||
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context (e.g., "memory_graph_system", "authentication_bug").
|
||||
- Each entity should have exactly ONE type that best describes what it is.
|
||||
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
|
||||
|
||||
3. **Exclusions**:
|
||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
||||
|
||||
4. **Formatting**:
|
||||
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
||||
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
|
||||
4. **Entity Name Extraction**:
|
||||
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
|
||||
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
|
||||
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
|
||||
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
|
||||
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
|
||||
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
|
||||
|
||||
## Examples of Correct Entity Extraction:
|
||||
|
||||
**CORRECT Examples:**
|
||||
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
|
||||
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
|
||||
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
|
||||
|
||||
**INCORRECT Examples:**
|
||||
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
|
||||
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
|
||||
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
@ -98,17 +115,37 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli
|
||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||
|
||||
2. **Entity Classification**:
|
||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
||||
- Assign the appropriate type for each one.
|
||||
- Classify pronouns (I, me, you, etc.) as Alias entities.
|
||||
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
|
||||
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
|
||||
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context.
|
||||
- Each entity should have exactly ONE type that best describes what it is.
|
||||
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
|
||||
|
||||
3. **Exclusions**:
|
||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
||||
|
||||
4. **Formatting**:
|
||||
- Be **explicit and unambiguous** when naming entities (e.g., use full names when available).
|
||||
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
|
||||
4. **Entity Name Extraction**:
|
||||
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
|
||||
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
|
||||
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
|
||||
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
|
||||
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
|
||||
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
|
||||
|
||||
## Examples of Correct Entity Extraction:
|
||||
|
||||
**CORRECT Examples:**
|
||||
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
|
||||
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
|
||||
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
|
||||
|
||||
**INCORRECT Examples:**
|
||||
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
|
||||
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
|
||||
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
@ -167,7 +204,10 @@ Guidelines:
|
||||
1. Extract significant entities, concepts, or actors mentioned in the content.
|
||||
2. Avoid creating nodes for relationships or actions.
|
||||
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
||||
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
||||
4. **CLEAN ENTITY NAMES**: Extract ONLY the core entity name WITHOUT type descriptors:
|
||||
- "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||
- Remove words like "app", "system", "platform", "tool", "service", "company" from entity names
|
||||
5. Use full names when available and avoid abbreviations.
|
||||
|
||||
${context.customPrompt || ""}
|
||||
`;
|
||||
@ -186,7 +226,17 @@ export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
|
||||
|
||||
|
||||
## CRITICAL RULE: Entity Type Matters
|
||||
DO NOT mark entities with different types as duplicates, even if they have identical names.
|
||||
- DO NOT mark "John" (Person) and "John" (Company) as duplicates
|
||||
- DO NOT mark "Apple" (Company) and "Apple" (Fruit) as duplicates
|
||||
- DO NOT mark "Core" (App) and "Core" (Concept) as duplicates
|
||||
|
||||
Consider entities as potential duplicates ONLY if they have:
|
||||
1. Similar or identical names AND
|
||||
2. The EXACT SAME entity type
|
||||
|
||||
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
||||
{
|
||||
id: integer id of the entity,
|
||||
@ -203,18 +253,55 @@ Each entity in ENTITIES is represented as a JSON object with the following struc
|
||||
]
|
||||
}
|
||||
|
||||
For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
|
||||
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
||||
Do NOT mark entities as duplicates if:
|
||||
- They are related but distinct.
|
||||
- They have similar names or purposes but refer to separate instances or concepts.
|
||||
## Duplication Decision Rules
|
||||
For each entity, determine if it is a duplicate of any of its duplication candidates:
|
||||
|
||||
Task:
|
||||
Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
||||
### MARK AS DUPLICATE (duplicate_idx >= 0) when:
|
||||
- Verify the candidate has the SAME entity_type as the current entity
|
||||
- AND confirm the entities refer to the same real-world object or concept
|
||||
- AND check that the names are very similar or identical
|
||||
|
||||
### SPECIAL RULE FOR PREDICATES:
|
||||
**ALWAYS mark identical predicates as duplicates** - predicates are universal and reusable:
|
||||
- Mark "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0 ✓
|
||||
- Mark "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0 ✓
|
||||
- Mark "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0 ✓
|
||||
|
||||
### DO NOT mark as duplicate (duplicate_idx = -1) when:
|
||||
- Confirm the candidate has a DIFFERENT entity_type (even with identical names)
|
||||
- Identify they are related but distinct entities
|
||||
- Recognize they have similar names or purposes but refer to separate instances or concepts
|
||||
- Distinguish when one is a general concept and the other is a specific instance
|
||||
- **EXCEPTION**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
|
||||
|
||||
## Examples:
|
||||
|
||||
**CORRECT - Mark as NOT Duplicates (Different Types):**
|
||||
- Set "Tesla" (Company) vs "Tesla" (Car) → duplicate_idx = -1
|
||||
- Set "Apple" (Company) vs "Apple" (Fruit) → duplicate_idx = -1
|
||||
- Set "Core" (App) vs "Core" (System) → duplicate_idx = -1
|
||||
|
||||
**CORRECT - Mark Predicates AS Duplicates (Same Name, Same Type):**
|
||||
- Set "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0
|
||||
- Set "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0
|
||||
- Set "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0
|
||||
|
||||
**CORRECT - Evaluate Potential Duplicates (Same Type):**
|
||||
- Check if "John Smith" (Person) vs "John Smith" (Person) refer to same person
|
||||
- Check if "Microsoft" (Company) vs "Microsoft Corporation" (Company) are the same company
|
||||
- Check if "iPhone" (Product) vs "Apple iPhone" (Product) are the same product
|
||||
|
||||
**CORRECT - Mark as NOT Duplicates (Same Type, Different Instances):**
|
||||
- Set "Meeting" (Event) vs "Meeting" (Event) → duplicate_idx = -1 (different meetings)
|
||||
- Set "Project" (Task) vs "Project" (Task) → duplicate_idx = -1 (different projects)
|
||||
- **NOTE**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
|
||||
|
||||
## Task:
|
||||
Provide your response as a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
||||
|
||||
For each entity, include:
|
||||
- "id": the id of the entity (integer)
|
||||
- "name": the name of the entity (string)
|
||||
- "name": the name of the entity (string)
|
||||
- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
|
||||
|
||||
Format your response as follows:
|
||||
@ -231,10 +318,12 @@ Format your response as follows:
|
||||
}
|
||||
</output>
|
||||
|
||||
Notes:
|
||||
- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate.
|
||||
- If an entity is not a duplicate of any candidate, set duplicate_idx to -1.
|
||||
- Always include all entities from the input in your response.
|
||||
## Important Instructions:
|
||||
- FIRST check if entity types match before considering any duplication
|
||||
- If entity types don't match, immediately set duplicate_idx = -1
|
||||
- Only mark entities with identical types as potential duplicates
|
||||
- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
|
||||
- Always include all entities from the input in your response
|
||||
- Always wrap the output in these tags <output> </output>
|
||||
`,
|
||||
},
|
||||
|
||||
@ -4,31 +4,66 @@ export const normalizePrompt = (
|
||||
context: Record<string, any>,
|
||||
): CoreMessage[] => {
|
||||
const sysPrompt = `
|
||||
You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Your task is to convert input information—such as user input, system events, or assistant actions—into clear, concise, third-person factual statements suitable for storage in a memory graph. These statements should be easily understandable and retrievable by any system or agent.
|
||||
You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Convert input information into clear, concise, third-person factual statements that EVOLVE the memory graph by forming new relationships and capturing new information.
|
||||
|
||||
## Core Processing Philosophy
|
||||
When related memories are provided, make memory graph evolution your PRIMARY GOAL, NOT information storage:
|
||||
- **EVOLVE**: Focus on new information that adds relationships or updates existing knowledge
|
||||
- **CONNECT**: Form explicit relationships between new and existing information
|
||||
- **FILTER**: Aggressively exclude information already captured in related memories
|
||||
- **ENHANCE**: Use existing knowledge to clarify new information and form connections
|
||||
|
||||
## Memory Processing Guidelines
|
||||
- Always output memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
|
||||
- Output all memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
|
||||
- Convert input information into clear, concise memory statements.
|
||||
- Maintain a neutral, factual tone in all memory entries.
|
||||
- Structure memories as factual statements, not questions.
|
||||
- Include relevant context and temporal information when available.
|
||||
- When ingesting from assistant's perspective, ensure you still capture the complete user-assistant interaction context.
|
||||
- When ingesting from assistant's perspective, capture the complete user-assistant interaction context.
|
||||
|
||||
## Complete Conversational Context
|
||||
- IMPORTANT: Always preserve the complete context of conversations, including BOTH:
|
||||
- IMPORTANT: Preserve the complete context of conversations, including BOTH:
|
||||
- What the user said, asked, or requested
|
||||
- How the assistant responded or what it suggested
|
||||
- Any decisions, conclusions, or agreements reached
|
||||
- Do not focus solely on the assistant's contributions while ignoring user context
|
||||
- Capture the cause-and-effect relationship between user inputs and assistant responses
|
||||
- For multi-turn conversations, preserve the logical flow and key points from each turn
|
||||
- When the user provides information, always record that information directly, not just how the assistant used it
|
||||
- When the user provides information, record that information directly, not just how the assistant used it
|
||||
|
||||
## Node Entity Types
|
||||
${context.entityTypes}
|
||||
|
||||
## Related Memory Processing Strategy
|
||||
When related memories are provided, apply this filtering and enhancement strategy:
|
||||
|
||||
### 1. INFORMATION FILTERING (What NOT to Include)
|
||||
- **Already Captured Facts**: Do not repeat information already present in related memories unless it adds new context
|
||||
- **Static Relationships**: Skip relationships already established (e.g., "John is co-founder" if already captured)
|
||||
- **Redundant Details**: Exclude details that don't add new understanding or connections
|
||||
- **Background Context**: Filter out explanatory information that's already in the memory graph
|
||||
|
||||
### 2. RELATIONSHIP FORMATION (What TO Include)
|
||||
- **New Connections**: Include explicit relationships between entities mentioned in current and related episodes
|
||||
- **Evolving Relationships**: Capture changes or updates to existing relationships
|
||||
- **Cross-Context Links**: Form connections that bridge different contexts or time periods
|
||||
- **Causal Relationships**: Extract how current information affects or is affected by existing knowledge
|
||||
|
||||
### 3. NEW INFORMATION EXTRACTION (Priority Focus)
|
||||
- **Fresh Facts**: Extract information not present in any related memory
|
||||
- **Updated Status**: Capture changes to previously captured information
|
||||
- **New Attributes**: Add additional properties or characteristics of known entities
|
||||
- **Temporal Updates**: Record time-based changes or progressions
|
||||
- **Contextual Additions**: Include new contexts or situations involving known entities
|
||||
|
||||
### 4. MEMORY GRAPH EVOLUTION PATTERNS
|
||||
- **Entity Enhancement**: Add new properties to existing entities without repeating known ones
|
||||
- **Relationship Expansion**: Create new relationship types between known entities
|
||||
- **Network Growth**: Connect previously isolated memory clusters
|
||||
- **Knowledge Refinement**: Update or correct existing information with new insights
|
||||
|
||||
## Memory Selection Criteria
|
||||
Evaluate conversations based on these priority categories:
|
||||
Evaluate conversations using these priority categories:
|
||||
|
||||
### 1. High Priority (Always Remember)
|
||||
- **User Preferences**: Explicit likes, dislikes, settings, or preferences
|
||||
@ -97,59 +132,74 @@ Evaluate conversations based on these priority categories:
|
||||
- **QA/Troubleshooting**: Conversations clearly intended for testing or debugging purposes
|
||||
- **Internal Processing**: Comments about the assistant's own thinking process
|
||||
|
||||
## Related Knowledge Integration
|
||||
- Consider these related episodes when processing new information:
|
||||
## Enhanced Processing for Related Memories
|
||||
When related memories are provided:
|
||||
|
||||
- Look for connections between new information and these existing memories
|
||||
- Identify patterns, contradictions, or evolving preferences
|
||||
- Reference related episodes when they provide important context
|
||||
- Update or refine existing knowledge with new information
|
||||
### Step 1: Analyze Existing Knowledge
|
||||
- Identify all entities, relationships, and facts already captured
|
||||
- Map the existing knowledge structure
|
||||
- Note any gaps or areas for enhancement
|
||||
|
||||
## Memory Graph Integration
|
||||
- Each memory will be converted to a node in the memory graph.
|
||||
- Include relevant relationships between memory items when possible.
|
||||
- Specify temporal aspects when memories are time-sensitive.
|
||||
- Format memories to support efficient retrieval by any system or agent.
|
||||
### Step 2: Extract Novel Information
|
||||
- Filter current episode for information NOT in related memories
|
||||
- Identify new entities, attributes, or relationships
|
||||
- Focus on information that adds value to the memory graph
|
||||
|
||||
## Related Knowledge Integration
|
||||
- Consider these related episodes and facts when processing new information:
|
||||
- When related facts or episodes are provided, carefully analyze them for:
|
||||
- **Connections**: Identify relationships between new information and existing memories
|
||||
- **Patterns**: Recognize recurring themes, preferences, or behaviors
|
||||
- **Contradictions**: Note when new information conflicts with existing knowledge
|
||||
- **Evolution**: Track how user preferences or situations change over time
|
||||
- **Context**: Use related memories to better understand the significance of new information
|
||||
- Incorporate relevant context from related memories when appropriate
|
||||
- Update or refine existing knowledge with new information
|
||||
- When contradictions exist, note both the old and new information with timestamps
|
||||
- Use related memories to determine the priority level of new information
|
||||
- If related memories suggest a topic is important to the user, elevate its priority
|
||||
### Step 3: Form Strategic Relationships
|
||||
- Connect new entities to existing ones through explicit relationships
|
||||
- Convert implicit connections into explicit memory statements
|
||||
- Bridge knowledge gaps using new information
|
||||
|
||||
### Step 4: Evolve Existing Knowledge
|
||||
- Update outdated information with new details
|
||||
- Add new attributes to known entities
|
||||
- Expand relationship networks with new connections
|
||||
|
||||
## Making Implicit Relationships Explicit
|
||||
- **Entity Disambiguation**: When same names appear across contexts, use related memories to clarify relationships
|
||||
- **Possessive Language**: Convert possessive forms into explicit relationships using related memory context
|
||||
- **Cross-Reference Formation**: Create explicit links between entities that appear in multiple episodes
|
||||
- **Temporal Relationship**: Establish time-based connections between related events or decisions
|
||||
|
||||
## Information Prioritization with Related Memories
|
||||
- **HIGHEST PRIORITY**: New relationships between known entities
|
||||
- **HIGH PRIORITY**: New attributes or properties of known entities
|
||||
- **MEDIUM PRIORITY**: New entities with connections to existing knowledge
|
||||
- **LOW PRIORITY**: Standalone new information without clear connections
|
||||
- **EXCLUDE**: Information already captured in related memories that doesn't add new connections
|
||||
|
||||
## Output Format
|
||||
When extracting memory-worthy information:
|
||||
|
||||
1. If nothing meets the criteria for storage, respond with exactly: "NOTHING_TO_REMEMBER"
|
||||
1. If nothing meets the criteria for storage (especially after filtering against related memories), respond with exactly: "NOTHING_TO_REMEMBER"
|
||||
|
||||
2. Otherwise, provide a summary that:
|
||||
- **Scales with conversation complexity**:
|
||||
* For simple exchanges with 1-2 key points: Use 1-2 concise sentences
|
||||
* For moderate complexity with 3-5 key points: Use 3-5 sentences, organizing related information
|
||||
* For complex conversations with many important details: Use up to 8-10 sentences, structured by topic
|
||||
- Focuses on facts rather than interpretations
|
||||
- Uses the third person perspective
|
||||
- Includes specific details (names, dates, numbers) when relevant
|
||||
- Avoids unnecessary context or explanation
|
||||
- Formats key information as attribute-value pairs when appropriate
|
||||
- Uses bullet points for multiple distinct pieces of information
|
||||
- **Prioritizes NEW information**: Focus on facts not present in related memories
|
||||
- **Emphasizes relationships**: Highlight connections between new and existing information
|
||||
- **Scales with novelty**: Make length reflect amount of genuinely new, valuable information
|
||||
- **Uses third person perspective**: Maintain neutral, factual tone
|
||||
- **Includes specific details**: Include names, dates, numbers when they add new value
|
||||
- **Avoids redundancy**: Skip information already captured in related memories
|
||||
- **Forms explicit connections**: Make relationships between entities clear and direct
|
||||
|
||||
## Examples of Complete Context Extraction
|
||||
- INCOMPLETE: "Assistant suggested Italian restaurants in downtown."
|
||||
- COMPLETE: "User asked for restaurant recommendations in downtown. Assistant suggested three Italian restaurants: Bella Vita, Romano's, and Trattoria Milano."
|
||||
## Examples of Memory Graph Evolution
|
||||
|
||||
- INCOMPLETE: "Assistant provided information about Python functions."
|
||||
- COMPLETE: "User asked how to define functions in Python. Assistant explained the syntax using 'def' keyword and provided an example of a function that calculates the factorial of a number."
|
||||
### Before (Redundant Approach):
|
||||
Related Memory: "John Smith is the co-founder of TechCorp."
|
||||
Current Episode: "User discussed project timeline with John, the co-founder."
|
||||
BAD Output: "User discussed project timeline with John Smith, who is the co-founder of TechCorp."
|
||||
|
||||
When processing new information for memory storage, focus on extracting the core facts, preferences, and events that will be most useful for future reference by any system or agent.
|
||||
### After (Evolution Approach):
|
||||
Related Memory: "John Smith is the co-founder of TechCorp."
|
||||
Current Episode: "User discussed project timeline with John, the co-founder."
|
||||
GOOD Output: "User discussed project timeline with John Smith. The project timeline discussion involved TechCorp's co-founder."
|
||||
|
||||
### Relationship Formation Example:
|
||||
Related Memory: "User prefers morning meetings."
|
||||
Current Episode: "User scheduled a meeting with John for 9 AM."
|
||||
Output: "User scheduled a 9 AM meeting with John Smith, aligning with their preference for morning meetings."
|
||||
|
||||
Process information with related memories by focusing on evolving the memory graph through new connections and information rather than repeating already captured facts.
|
||||
|
||||
<output>
|
||||
{{processed_statement}}
|
||||
|
||||
@ -12,7 +12,7 @@ export const extractStatements = (
|
||||
return [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a knowledge graph expert who extracts factual statements from text as subject-predicate-object triples.
|
||||
content: `You are a knowledge graph expert who extracts NEW factual statements from text as subject-predicate-object triples.
|
||||
|
||||
CRITICAL REQUIREMENT:
|
||||
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
|
||||
@ -21,28 +21,60 @@ CRITICAL REQUIREMENT:
|
||||
- DO NOT create, invent, or modify any entity names.
|
||||
- NEVER create statements where the source and target are the same entity (no self-loops).
|
||||
|
||||
Your task is to identify important facts from the provided text and represent them in a knowledge graph format.
|
||||
## PRIMARY MISSION: EXTRACT NEW RELATIONSHIPS
|
||||
Focus on extracting factual statements that ADD NEW VALUE to the knowledge graph:
|
||||
- **PRIORITIZE**: New relationships not already captured in previous episodes
|
||||
- **EMPHASIZE**: Connections between entities with same names but different types
|
||||
- **FILTER**: Avoid extracting facts already present in previous episodes
|
||||
- **EVOLVE**: Form relationships that enhance the existing knowledge structure
|
||||
|
||||
Your task is to identify NEW important facts from the provided text and represent them in a knowledge graph format.
|
||||
|
||||
Follow these instructions:
|
||||
|
||||
1. First, carefully review the AVAILABLE ENTITIES list. These are the ONLY entities you can use as subjects and objects.
|
||||
2. Identify factual statements that can be expressed using ONLY these available entities.
|
||||
3. For each valid statement, provide:
|
||||
1. **ANALYZE PREVIOUS EPISODES**: Review previous episodes to understand what relationships already exist
|
||||
2. **REVIEW AVAILABLE ENTITIES**: Carefully examine the AVAILABLE ENTITIES list - these are the ONLY entities you can use as subjects and objects
|
||||
3. **IDENTIFY SAME-NAME ENTITIES**: Look for entities with identical names but different types - these often represent natural relationships that should be explicitly connected
|
||||
4. **EXTRACT NEW RELATIONSHIPS**: Identify factual statements that can be expressed using ONLY available entities AND are NOT already captured in previous episodes
|
||||
5. For each NEW valid statement, provide:
|
||||
- source: The subject entity (MUST be from AVAILABLE ENTITIES)
|
||||
- predicate: The relationship type (can be a descriptive phrase)
|
||||
- target: The object entity (MUST be from AVAILABLE ENTITIES)
|
||||
|
||||
EXTRACT ALL MEANINGFUL RELATIONSHIPS:
|
||||
- Extract any meaningful relationship between available entities that's expressed in the text.
|
||||
- Use predicates that accurately describe the relationship between entities.
|
||||
- Be creative but precise in identifying relationships - don't miss important facts.
|
||||
- Common examples include (but are not limited to):
|
||||
* Ownership or association (e.g., "Taylor Swift" "performs at" "Taylor Swift's concert")
|
||||
* Participation or attendance (e.g., "John" "attends" "Conference")
|
||||
* Personal connections (e.g., "John" "is friend of" "Max")
|
||||
* Aliases (e.g., "John" "is also known as" "John Smith")
|
||||
* Locations (e.g., "Company" "headquartered in" "City")
|
||||
* Characteristics (e.g., "Product" "has feature" "Feature")
|
||||
EXTRACT NEW MEANINGFUL RELATIONSHIPS:
|
||||
- Extract meaningful relationships between available entities that are NOT already captured in previous episodes
|
||||
- Use predicates that accurately describe new relationships between entities
|
||||
- Be creative but precise in identifying NEW relationships - focus on value-adding connections
|
||||
- **HIGHEST PRIORITY**: Entities with identical names but different types MUST be connected with explicit relationship statements
|
||||
- **MANDATORY**: When you find entities like "John (Person)" and "John (Company)", create explicit relationships such as "John" "owns" "John" or "John" "founded" "John"
|
||||
- Look for both explicit and implicit NEW relationships mentioned in the text
|
||||
- **FILTER OUT**: Relationships already established in previous episodes unless they represent updates or changes
|
||||
- Common relationship types include (but are not limited to):
|
||||
* Ownership or association (e.g., "Alice" "owns" "Restaurant")
|
||||
* Participation or attendance (e.g., "Team" "participates in" "Tournament")
|
||||
* Personal connections (e.g., "Sarah" "works with" "Michael")
|
||||
* Aliases and alternative names (e.g., "Robert" "is also known as" "Bob")
|
||||
* Locations and spatial relationships (e.g., "Office" "located in" "Building")
|
||||
* Characteristics and properties (e.g., "System" "has property" "Scalability")
|
||||
* Product-organization relationships (e.g., "Software" "developed by" "Company")
|
||||
* Technical dependencies and usage (e.g., "Application" "uses" "Database")
|
||||
* Hierarchical relationships (e.g., "Manager" "supervises" "Employee")
|
||||
|
||||
## SAME-NAME ENTITY RELATIONSHIP FORMATION
|
||||
When entities share identical names but have different types, CREATE explicit relationship statements:
|
||||
- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)"
|
||||
- **Person-Location**: "Smith (Person)" → "lives in", "founded", or "is associated with" → "Smith (City)"
|
||||
- **Event-Location**: "Conference (Event)" → "takes place at" or "is hosted by" → "Conference (Venue)"
|
||||
- **Product-Company**: "Tesla (Product)" → "is manufactured by" or "is developed by" → "Tesla (Company)"
|
||||
- **MANDATORY**: Always create at least one relationship statement for same-name entities
|
||||
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
|
||||
|
||||
## PREVIOUS EPISODE FILTERING
|
||||
Before creating any relationship statement:
|
||||
- **CHECK**: Review previous episodes to see if this exact relationship already exists
|
||||
- **SKIP**: Do not create statements that duplicate existing relationships
|
||||
- **ENHANCE**: Only create statements if they add new information or represent updates
|
||||
- **FOCUS**: Prioritize completely new connections not represented in the knowledge graph
|
||||
|
||||
ABOUT TEMPORAL INFORMATION:
|
||||
- For events with dates/times, DO NOT create a separate statement with the event as both source and target.
|
||||
@ -56,8 +88,10 @@ Format your response as a JSON object with the following structure:
|
||||
"edges": [
|
||||
{
|
||||
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
|
||||
"sourceType": "[Source Entity Type]",
|
||||
"predicate": "[Relationship Type]",
|
||||
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
|
||||
"targetType": "[Target Entity Type]",
|
||||
"fact": "[Natural language representation of the fact]",
|
||||
"attributes": {
|
||||
"confidence": confidence of the fact
|
||||
@ -69,21 +103,25 @@ Format your response as a JSON object with the following structure:
|
||||
</output>
|
||||
|
||||
IMPORTANT RULES:
|
||||
- ONLY use entities from AVAILABLE ENTITIES as source and target.
|
||||
- NEVER create statements where source or target is not in AVAILABLE ENTITIES.
|
||||
- NEVER create statements where the source and target are the same entity (NO SELF-LOOPS).
|
||||
- Instead of creating self-loops for temporal information, add timespan attributes to relevant statements.
|
||||
- If you cannot express a fact using only available entities, omit it entirely.
|
||||
- Always wrap output in tags <output> </output>.
|
||||
- **ENTITIES**: ONLY use entities from AVAILABLE ENTITIES as source and target
|
||||
- **NO INVENTION**: NEVER create statements where source or target is not in AVAILABLE ENTITIES
|
||||
- **NO SELF-LOOPS**: NEVER create statements where the source and target are the same entity
|
||||
- **SAME-NAME PRIORITY**: When entities share names but have different types, CREATE explicit relationship statements between them
|
||||
- **NEW ONLY**: Do NOT create statements that duplicate relationships already present in previous episodes
|
||||
- **TEMPORAL**: Instead of creating self-loops for temporal information, add timespan attributes to relevant statements
|
||||
- **FILTER FIRST**: If you cannot express a NEW fact using only available entities, omit it entirely
|
||||
- **OUTPUT FORMAT**: Always wrap output in tags <output> </output>
|
||||
|
||||
Example of CORRECT usage:
|
||||
If AVAILABLE ENTITIES contains ["John", "Max", "Wedding"], you can create:
|
||||
- "John" "attends" "Wedding" ✓
|
||||
- "Max" "married to" "Tina" with timespan attribute ✓
|
||||
If AVAILABLE ENTITIES contains ["John", "Max", "Wedding", "John (Company)"], you can create:
|
||||
- "John" "attends" "Wedding" ✓ (if not already in previous episodes)
|
||||
- "Max" "married to" "Tina" with timespan attribute ✓ (if new relationship)
|
||||
- "John" "founded" "John (Company)" ✓ (PRIORITY: same name, different types)
|
||||
|
||||
Example of INCORRECT usage:
|
||||
- "John" "attends" "Party" ✗ (if "Party" is not in AVAILABLE ENTITIES)
|
||||
- "Marriage" "occurs on" "Marriage" ✗ (NEVER create self-loops)
|
||||
- "John" "attends" "Wedding" ✗ (if already captured in previous episodes)
|
||||
- "January 14" "is" "Marriage date" ✗ (if "January 14" or "Marriage date" is not in AVAILABLE ENTITIES)`,
|
||||
},
|
||||
{
|
||||
|
||||
@ -447,3 +447,55 @@ export function getNodeTypesString(apps: Array<keyof typeof APP_NODE_TYPES>) {
|
||||
export function getNodeAttributesString(
|
||||
apps: Array<keyof typeof APP_NODE_TYPES>,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Check if a type is a preset type (from GENERAL_NODE_TYPES or APP_NODE_TYPES)
|
||||
*/
|
||||
export function isPresetType(
|
||||
type: string,
|
||||
apps: Array<keyof typeof APP_NODE_TYPES> = [],
|
||||
): boolean {
|
||||
// Check general types
|
||||
const generalTypes = Object.keys(GENERAL_NODE_TYPES).map(
|
||||
(key) => GENERAL_NODE_TYPES[key as keyof typeof GENERAL_NODE_TYPES].name,
|
||||
);
|
||||
|
||||
if (generalTypes.includes(type as any)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check app-specific types
|
||||
for (const app of apps) {
|
||||
const appTypes = Object.keys(APP_NODE_TYPES[app] || {}).map(
|
||||
(key) =>
|
||||
APP_NODE_TYPES[app][key as keyof (typeof APP_NODE_TYPES)[typeof app]]
|
||||
.name,
|
||||
);
|
||||
if (appTypes.includes(type as any)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all preset types for given apps
|
||||
*/
|
||||
export function getAllPresetTypes(
|
||||
apps: Array<keyof typeof APP_NODE_TYPES> = [],
|
||||
): string[] {
|
||||
const generalTypes = Object.keys(GENERAL_NODE_TYPES).map(
|
||||
(key) => GENERAL_NODE_TYPES[key as keyof typeof GENERAL_NODE_TYPES].name,
|
||||
);
|
||||
|
||||
const appTypes = apps.flatMap((app) =>
|
||||
Object.keys(APP_NODE_TYPES[app] || {}).map(
|
||||
(key) =>
|
||||
APP_NODE_TYPES[app][key as keyof (typeof APP_NODE_TYPES)[typeof app]]
|
||||
.name,
|
||||
),
|
||||
);
|
||||
|
||||
return [...generalTypes, ...appTypes];
|
||||
}
|
||||
|
||||
@ -29,9 +29,10 @@ export interface EpisodicNode {
|
||||
export interface EntityNode {
|
||||
uuid: string;
|
||||
name: string;
|
||||
type: string;
|
||||
type: string; // Single type - either from presets or custom
|
||||
attributes: Record<string, any>;
|
||||
nameEmbedding: number[];
|
||||
typeEmbedding: number[];
|
||||
createdAt: Date;
|
||||
userId: string;
|
||||
space?: string;
|
||||
@ -82,3 +83,13 @@ export type AddEpisodeResult = {
|
||||
statementsCreated: number;
|
||||
processingTimeMs: number;
|
||||
};
|
||||
|
||||
export interface ExtractedTripleData {
|
||||
source: string;
|
||||
sourceType: string;
|
||||
predicate: string;
|
||||
target: string;
|
||||
targetType: string;
|
||||
fact: string;
|
||||
attributes?: Record<string, any>;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user