mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-12 00:28:26 +00:00
Feat: Delete episode nodes
Enhance: knowledge graphs with implicit relationships - Added a new API route for deleting episodes, including related statements and entities. - Introduced error handling for unauthorized access and non-existent episodes. - Enhanced the KnowledgeGraphService with methods for resolving entities and managing relationships during deletions. - Updated entity and episode models to support new deletion logic and ensure data integrity.
This commit is contained in:
parent
50c4e2bcce
commit
28803bec17
60
apps/webapp/app/routes/api.v1.episode.delete.tsx
Normal file
60
apps/webapp/app/routes/api.v1.episode.delete.tsx
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
import { z } from "zod";
|
||||||
|
import { json } from "@remix-run/node";
|
||||||
|
import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
|
||||||
|
import { deleteEpisodeWithRelatedNodes } from "~/services/graphModels/episode";
|
||||||
|
|
||||||
|
export const DeleteEpisodeBodyRequest = z.object({
|
||||||
|
episodeUuid: z.string().uuid("Episode UUID must be a valid UUID"),
|
||||||
|
});
|
||||||
|
|
||||||
|
const { action, loader } = createActionApiRoute(
|
||||||
|
{
|
||||||
|
body: DeleteEpisodeBodyRequest,
|
||||||
|
allowJWT: true,
|
||||||
|
method: "DELETE",
|
||||||
|
authorization: {
|
||||||
|
action: "delete",
|
||||||
|
},
|
||||||
|
corsStrategy: "all",
|
||||||
|
},
|
||||||
|
async ({ body, authentication }) => {
|
||||||
|
try {
|
||||||
|
const result = await deleteEpisodeWithRelatedNodes({
|
||||||
|
episodeUuid: body.episodeUuid,
|
||||||
|
userId: authentication.userId,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!result.episodeDeleted) {
|
||||||
|
return json(
|
||||||
|
{
|
||||||
|
error: "Episode not found or unauthorized",
|
||||||
|
code: "not_found"
|
||||||
|
},
|
||||||
|
{ status: 404 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return json({
|
||||||
|
success: true,
|
||||||
|
message: "Episode deleted successfully",
|
||||||
|
deleted: {
|
||||||
|
episode: result.episodeDeleted,
|
||||||
|
statements: result.statementsDeleted,
|
||||||
|
entities: result.entitiesDeleted,
|
||||||
|
facts: result.factsDeleted,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error deleting episode:", error);
|
||||||
|
return json(
|
||||||
|
{
|
||||||
|
error: "Failed to delete episode",
|
||||||
|
code: "internal_error"
|
||||||
|
},
|
||||||
|
{ status: 500 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
export { action, loader };
|
||||||
@ -2,6 +2,19 @@ import type { EntityNode } from "@core/types";
|
|||||||
import { runQuery } from "~/lib/neo4j.server";
|
import { runQuery } from "~/lib/neo4j.server";
|
||||||
|
|
||||||
export async function saveEntity(entity: EntityNode): Promise<string> {
|
export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||||
|
// Debug: Log entity to identify missing typeEmbedding
|
||||||
|
if (!entity.typeEmbedding) {
|
||||||
|
console.error(`Entity missing typeEmbedding:`, {
|
||||||
|
uuid: entity.uuid,
|
||||||
|
name: entity.name,
|
||||||
|
type: entity.type,
|
||||||
|
hasNameEmbedding: !!entity.nameEmbedding,
|
||||||
|
});
|
||||||
|
throw new Error(
|
||||||
|
`Entity ${entity.name} (${entity.type}) is missing typeEmbedding`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const query = `
|
const query = `
|
||||||
MERGE (n:Entity {uuid: $uuid})
|
MERGE (n:Entity {uuid: $uuid})
|
||||||
ON CREATE SET
|
ON CREATE SET
|
||||||
@ -9,6 +22,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
|||||||
n.type = $type,
|
n.type = $type,
|
||||||
n.attributes = $attributes,
|
n.attributes = $attributes,
|
||||||
n.nameEmbedding = $nameEmbedding,
|
n.nameEmbedding = $nameEmbedding,
|
||||||
|
n.typeEmbedding = $typeEmbedding,
|
||||||
n.createdAt = $createdAt,
|
n.createdAt = $createdAt,
|
||||||
n.userId = $userId,
|
n.userId = $userId,
|
||||||
n.space = $space
|
n.space = $space
|
||||||
@ -17,6 +31,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
|||||||
n.type = $type,
|
n.type = $type,
|
||||||
n.attributes = $attributes,
|
n.attributes = $attributes,
|
||||||
n.nameEmbedding = $nameEmbedding,
|
n.nameEmbedding = $nameEmbedding,
|
||||||
|
n.typeEmbedding = $typeEmbedding,
|
||||||
n.space = $space
|
n.space = $space
|
||||||
RETURN n.uuid as uuid
|
RETURN n.uuid as uuid
|
||||||
`;
|
`;
|
||||||
@ -27,6 +42,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
|||||||
type: entity.type,
|
type: entity.type,
|
||||||
attributes: JSON.stringify(entity.attributes || {}),
|
attributes: JSON.stringify(entity.attributes || {}),
|
||||||
nameEmbedding: entity.nameEmbedding,
|
nameEmbedding: entity.nameEmbedding,
|
||||||
|
typeEmbedding: entity.typeEmbedding,
|
||||||
createdAt: entity.createdAt.toISOString(),
|
createdAt: entity.createdAt.toISOString(),
|
||||||
userId: entity.userId,
|
userId: entity.userId,
|
||||||
space: entity.space || null,
|
space: entity.space || null,
|
||||||
@ -52,6 +68,7 @@ export async function getEntity(uuid: string): Promise<EntityNode | null> {
|
|||||||
type: entity.type,
|
type: entity.type,
|
||||||
attributes: JSON.parse(entity.attributes || "{}"),
|
attributes: JSON.parse(entity.attributes || "{}"),
|
||||||
nameEmbedding: entity.nameEmbedding,
|
nameEmbedding: entity.nameEmbedding,
|
||||||
|
typeEmbedding: entity.typeEmbedding,
|
||||||
createdAt: new Date(entity.createdAt),
|
createdAt: new Date(entity.createdAt),
|
||||||
userId: entity.userId,
|
userId: entity.userId,
|
||||||
space: entity.space,
|
space: entity.space,
|
||||||
@ -85,6 +102,43 @@ export async function findSimilarEntities(params: {
|
|||||||
type: entity.type,
|
type: entity.type,
|
||||||
attributes: JSON.parse(entity.attributes || "{}"),
|
attributes: JSON.parse(entity.attributes || "{}"),
|
||||||
nameEmbedding: entity.nameEmbedding,
|
nameEmbedding: entity.nameEmbedding,
|
||||||
|
typeEmbedding: entity.typeEmbedding,
|
||||||
|
createdAt: new Date(entity.createdAt),
|
||||||
|
userId: entity.userId,
|
||||||
|
space: entity.space,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function findSimilarEntitiesWithSameType(params: {
|
||||||
|
queryEmbedding: number[];
|
||||||
|
entityType: string;
|
||||||
|
limit: number;
|
||||||
|
threshold: number;
|
||||||
|
userId: string;
|
||||||
|
}): Promise<EntityNode[]> {
|
||||||
|
const query = `
|
||||||
|
MATCH (entity:Entity)
|
||||||
|
WHERE entity.nameEmbedding IS NOT NULL
|
||||||
|
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
|
||||||
|
WHERE score >= $threshold
|
||||||
|
AND entity.userId = $userId
|
||||||
|
AND entity.type = $entityType
|
||||||
|
RETURN entity, score
|
||||||
|
ORDER BY score DESC
|
||||||
|
`;
|
||||||
|
|
||||||
|
const result = await runQuery(query, params);
|
||||||
|
return result.map((record) => {
|
||||||
|
const entity = record.get("entity").properties;
|
||||||
|
|
||||||
|
return {
|
||||||
|
uuid: entity.uuid,
|
||||||
|
name: entity.name,
|
||||||
|
type: entity.type,
|
||||||
|
attributes: JSON.parse(entity.attributes || "{}"),
|
||||||
|
nameEmbedding: entity.nameEmbedding,
|
||||||
|
typeEmbedding: entity.typeEmbedding,
|
||||||
createdAt: new Date(entity.createdAt),
|
createdAt: new Date(entity.createdAt),
|
||||||
userId: entity.userId,
|
userId: entity.userId,
|
||||||
space: entity.space,
|
space: entity.space,
|
||||||
@ -115,9 +169,89 @@ export async function findExactPredicateMatches(params: {
|
|||||||
type: entity.type,
|
type: entity.type,
|
||||||
attributes: JSON.parse(entity.attributes || "{}"),
|
attributes: JSON.parse(entity.attributes || "{}"),
|
||||||
nameEmbedding: entity.nameEmbedding,
|
nameEmbedding: entity.nameEmbedding,
|
||||||
|
typeEmbedding: entity.typeEmbedding,
|
||||||
createdAt: new Date(entity.createdAt),
|
createdAt: new Date(entity.createdAt),
|
||||||
userId: entity.userId,
|
userId: entity.userId,
|
||||||
space: entity.space,
|
space: entity.space,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace entity references in all statements with a new entity
|
||||||
|
* Updates all statements where the old entity appears as subject, predicate, or object
|
||||||
|
*/
|
||||||
|
export async function replaceEntityReferences(
|
||||||
|
evolvedEntity: EntityNode,
|
||||||
|
oldEntityUUIDs: string[],
|
||||||
|
): Promise<void> {
|
||||||
|
// Save the new entity first to ensure it exists in the database
|
||||||
|
await saveEntity(evolvedEntity);
|
||||||
|
|
||||||
|
// Then update all references from old entity to new entity
|
||||||
|
oldEntityUUIDs.forEach(async (oldEntityUUID) => {
|
||||||
|
await updateStatementsWithNewEntity(oldEntityUUID, evolvedEntity.uuid);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update all statements that reference an old entity to use the new entity
|
||||||
|
* This includes updating subject, predicate, and object relationships
|
||||||
|
*/
|
||||||
|
export async function updateStatementsWithNewEntity(
|
||||||
|
oldEntityUUID: string,
|
||||||
|
newEntityUUID: string,
|
||||||
|
): Promise<void> {
|
||||||
|
const queries = [
|
||||||
|
// Update statements where old entity is the subject
|
||||||
|
`
|
||||||
|
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:SUBJECT]->(statement:Statement)
|
||||||
|
MATCH (newEntity:Entity {uuid: $newEntityUUID})
|
||||||
|
DELETE oldEntity-[:SUBJECT]->statement
|
||||||
|
CREATE newEntity-[:SUBJECT]->statement
|
||||||
|
`,
|
||||||
|
// Update statements where old entity is the predicate
|
||||||
|
`
|
||||||
|
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:PREDICATE]->(statement:Statement)
|
||||||
|
MATCH (newEntity:Entity {uuid: $newEntityUUID})
|
||||||
|
DELETE oldEntity-[:PREDICATE]->statement
|
||||||
|
CREATE newEntity-[:PREDICATE]->statement
|
||||||
|
`,
|
||||||
|
// Update statements where old entity is the object
|
||||||
|
`
|
||||||
|
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:OBJECT]->(statement:Statement)
|
||||||
|
MATCH (newEntity:Entity {uuid: $newEntityUUID})
|
||||||
|
DELETE oldEntity-[:OBJECT]->statement
|
||||||
|
CREATE newEntity-[:OBJECT]->statement
|
||||||
|
`,
|
||||||
|
];
|
||||||
|
|
||||||
|
const params = {
|
||||||
|
oldEntityUUID,
|
||||||
|
newEntityUUID,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Execute all update queries
|
||||||
|
for (const query of queries) {
|
||||||
|
await runQuery(query, params);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional: Delete the old entity if no longer referenced
|
||||||
|
await deleteEntityIfUnreferenced(oldEntityUUID);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete an entity if it's no longer referenced by any statements
|
||||||
|
*/
|
||||||
|
async function deleteEntityIfUnreferenced(entityUUID: string): Promise<void> {
|
||||||
|
const checkQuery = `
|
||||||
|
MATCH (entity:Entity {uuid: $entityUUID})
|
||||||
|
OPTIONAL MATCH (entity)-[r]-()
|
||||||
|
WITH entity, count(r) as relationshipCount
|
||||||
|
WHERE relationshipCount = 0
|
||||||
|
DELETE entity
|
||||||
|
RETURN count(entity) as deletedCount
|
||||||
|
`;
|
||||||
|
|
||||||
|
await runQuery(checkQuery, { entityUUID });
|
||||||
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import { runQuery } from "~/lib/neo4j.server";
|
import { runQuery } from "~/lib/neo4j.server";
|
||||||
import type { EpisodicNode } from "@core/types";
|
import type { EntityNode, EpisodicNode } from "@core/types";
|
||||||
|
|
||||||
export async function saveEpisode(episode: EpisodicNode): Promise<string> {
|
export async function saveEpisode(episode: EpisodicNode): Promise<string> {
|
||||||
const query = `
|
const query = `
|
||||||
@ -178,3 +178,134 @@ export async function searchEpisodesByEmbedding(params: {
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Delete episode and its related nodes safely
|
||||||
|
export async function deleteEpisodeWithRelatedNodes(params: {
|
||||||
|
episodeUuid: string;
|
||||||
|
userId: string;
|
||||||
|
}): Promise<{
|
||||||
|
episodeDeleted: boolean;
|
||||||
|
statementsDeleted: number;
|
||||||
|
entitiesDeleted: number;
|
||||||
|
factsDeleted: number;
|
||||||
|
}> {
|
||||||
|
// Step 1: Check if episode exists
|
||||||
|
const episodeCheck = await runQuery(
|
||||||
|
`MATCH (e:Episode {uuid: $episodeUuid, userId: $userId}) RETURN e`,
|
||||||
|
{ episodeUuid: params.episodeUuid, userId: params.userId },
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!episodeCheck || episodeCheck.length === 0) {
|
||||||
|
return {
|
||||||
|
episodeDeleted: false,
|
||||||
|
statementsDeleted: 0,
|
||||||
|
entitiesDeleted: 0,
|
||||||
|
factsDeleted: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Find statements that are ONLY connected to this episode
|
||||||
|
const statementsToDelete = await runQuery(
|
||||||
|
`
|
||||||
|
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})-[:HAS_PROVENANCE]->(stmt:Statement)
|
||||||
|
WHERE NOT EXISTS {
|
||||||
|
MATCH (otherEpisode:Episode)-[:HAS_PROVENANCE]->(stmt)
|
||||||
|
WHERE otherEpisode.uuid <> $episodeUuid AND otherEpisode.userId = $userId
|
||||||
|
}
|
||||||
|
RETURN stmt.uuid as statementUuid
|
||||||
|
`,
|
||||||
|
{ episodeUuid: params.episodeUuid, userId: params.userId },
|
||||||
|
);
|
||||||
|
|
||||||
|
const statementUuids = statementsToDelete.map((r) => r.get("statementUuid"));
|
||||||
|
|
||||||
|
// Step 3: Find entities that are ONLY connected to statements we're deleting
|
||||||
|
const entitiesToDelete = await runQuery(
|
||||||
|
`
|
||||||
|
MATCH (stmt:Statement)-[r:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(entity:Entity)
|
||||||
|
WHERE stmt.uuid IN $statementUuids AND stmt.userId = $userId
|
||||||
|
AND NOT EXISTS {
|
||||||
|
MATCH (otherStmt:Statement)-[:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(entity)
|
||||||
|
WHERE otherStmt.userId = $userId AND NOT otherStmt.uuid IN $statementUuids
|
||||||
|
}
|
||||||
|
RETURN DISTINCT entity.uuid as entityUuid
|
||||||
|
`,
|
||||||
|
{ statementUuids, userId: params.userId },
|
||||||
|
);
|
||||||
|
|
||||||
|
const entityUuids = entitiesToDelete.map((r) => r.get("entityUuid"));
|
||||||
|
|
||||||
|
// Step 4: Delete statements
|
||||||
|
if (statementUuids.length > 0) {
|
||||||
|
await runQuery(
|
||||||
|
`
|
||||||
|
MATCH (stmt:Statement {userId: $userId})
|
||||||
|
WHERE stmt.uuid IN $statementUuids
|
||||||
|
DETACH DELETE stmt
|
||||||
|
`,
|
||||||
|
{ statementUuids, userId: params.userId },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 5: Delete orphaned entities
|
||||||
|
if (entityUuids.length > 0) {
|
||||||
|
await runQuery(
|
||||||
|
`
|
||||||
|
MATCH (entity:Entity {userId: $userId})
|
||||||
|
WHERE entity.uuid IN $entityUuids
|
||||||
|
DETACH DELETE entity
|
||||||
|
`,
|
||||||
|
{ entityUuids, userId: params.userId },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 6: Delete the episode
|
||||||
|
await runQuery(
|
||||||
|
`
|
||||||
|
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})
|
||||||
|
DETACH DELETE episode
|
||||||
|
`,
|
||||||
|
{ episodeUuid: params.episodeUuid, userId: params.userId },
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
episodeDeleted: true,
|
||||||
|
statementsDeleted: statementUuids.length,
|
||||||
|
entitiesDeleted: entityUuids.length,
|
||||||
|
factsDeleted: statementUuids.length,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function getRelatedEpisodesEntities(params: {
|
||||||
|
embedding: number[];
|
||||||
|
userId: string;
|
||||||
|
limit?: number;
|
||||||
|
minSimilarity?: number;
|
||||||
|
}) {
|
||||||
|
const query = `
|
||||||
|
MATCH (episode:Episode {userId: $userId})
|
||||||
|
WHERE episode.contentEmbedding IS NOT NULL
|
||||||
|
WITH episode,
|
||||||
|
CASE
|
||||||
|
WHEN size(episode.contentEmbedding) = size($embedding)
|
||||||
|
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
|
||||||
|
ELSE 0
|
||||||
|
END AS score
|
||||||
|
WHERE score >= $minSimilarity
|
||||||
|
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
|
||||||
|
WHERE entity IS NOT NULL
|
||||||
|
RETURN DISTINCT entity`;
|
||||||
|
|
||||||
|
const result = await runQuery(query, {
|
||||||
|
embedding: params.embedding,
|
||||||
|
minSimilarity: params.minSimilarity,
|
||||||
|
userId: params.userId,
|
||||||
|
});
|
||||||
|
|
||||||
|
return result
|
||||||
|
.map((record) => {
|
||||||
|
const entity = record.get("entity");
|
||||||
|
return entity ? (entity.properties as EntityNode) : null;
|
||||||
|
})
|
||||||
|
.filter((entity): entity is EntityNode => entity !== null);
|
||||||
|
}
|
||||||
|
|||||||
@ -243,6 +243,7 @@ export async function getTripleForStatement({
|
|||||||
name: subjectProps.name,
|
name: subjectProps.name,
|
||||||
type: subjectProps.type,
|
type: subjectProps.type,
|
||||||
nameEmbedding: subjectProps.nameEmbedding,
|
nameEmbedding: subjectProps.nameEmbedding,
|
||||||
|
typeEmbedding: subjectProps.typeEmbedding,
|
||||||
attributes: subjectProps.attributesJson
|
attributes: subjectProps.attributesJson
|
||||||
? JSON.parse(subjectProps.attributesJson)
|
? JSON.parse(subjectProps.attributesJson)
|
||||||
: {},
|
: {},
|
||||||
@ -255,6 +256,7 @@ export async function getTripleForStatement({
|
|||||||
name: predicateProps.name,
|
name: predicateProps.name,
|
||||||
type: predicateProps.type,
|
type: predicateProps.type,
|
||||||
nameEmbedding: predicateProps.nameEmbedding,
|
nameEmbedding: predicateProps.nameEmbedding,
|
||||||
|
typeEmbedding: predicateProps.typeEmbedding,
|
||||||
attributes: predicateProps.attributesJson
|
attributes: predicateProps.attributesJson
|
||||||
? JSON.parse(predicateProps.attributesJson)
|
? JSON.parse(predicateProps.attributesJson)
|
||||||
: {},
|
: {},
|
||||||
@ -267,6 +269,7 @@ export async function getTripleForStatement({
|
|||||||
name: objectProps.name,
|
name: objectProps.name,
|
||||||
type: objectProps.type,
|
type: objectProps.type,
|
||||||
nameEmbedding: objectProps.nameEmbedding,
|
nameEmbedding: objectProps.nameEmbedding,
|
||||||
|
typeEmbedding: objectProps.typeEmbedding,
|
||||||
attributes: objectProps.attributesJson
|
attributes: objectProps.attributesJson
|
||||||
? JSON.parse(objectProps.attributesJson)
|
? JSON.parse(objectProps.attributesJson)
|
||||||
: {},
|
: {},
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
import { type CoreMessage } from "ai";
|
import { type CoreMessage } from "ai";
|
||||||
import {
|
import {
|
||||||
|
type ExtractedTripleData,
|
||||||
type AddEpisodeParams,
|
type AddEpisodeParams,
|
||||||
type EntityNode,
|
type EntityNode,
|
||||||
type EpisodicNode,
|
type EpisodicNode,
|
||||||
@ -20,11 +21,14 @@ import {
|
|||||||
} from "./prompts/statements";
|
} from "./prompts/statements";
|
||||||
import {
|
import {
|
||||||
getRecentEpisodes,
|
getRecentEpisodes,
|
||||||
|
getRelatedEpisodesEntities,
|
||||||
searchEpisodesByEmbedding,
|
searchEpisodesByEmbedding,
|
||||||
} from "./graphModels/episode";
|
} from "./graphModels/episode";
|
||||||
import {
|
import {
|
||||||
findExactPredicateMatches,
|
findExactPredicateMatches,
|
||||||
findSimilarEntities,
|
findSimilarEntities,
|
||||||
|
findSimilarEntitiesWithSameType,
|
||||||
|
replaceEntityReferences,
|
||||||
} from "./graphModels/entity";
|
} from "./graphModels/entity";
|
||||||
import {
|
import {
|
||||||
findContradictoryStatements,
|
findContradictoryStatements,
|
||||||
@ -35,7 +39,13 @@ import {
|
|||||||
searchStatementsByEmbedding,
|
searchStatementsByEmbedding,
|
||||||
} from "./graphModels/statement";
|
} from "./graphModels/statement";
|
||||||
import { getEmbedding, makeModelCall } from "~/lib/model.server";
|
import { getEmbedding, makeModelCall } from "~/lib/model.server";
|
||||||
import { Apps, getNodeTypes, getNodeTypesString } from "~/utils/presets/nodes";
|
import {
|
||||||
|
Apps,
|
||||||
|
getNodeTypes,
|
||||||
|
getNodeTypesString,
|
||||||
|
isPresetType,
|
||||||
|
getAllPresetTypes,
|
||||||
|
} from "~/utils/presets/nodes";
|
||||||
import { normalizePrompt } from "./prompts";
|
import { normalizePrompt } from "./prompts";
|
||||||
|
|
||||||
// Default number of previous episodes to retrieve for context
|
// Default number of previous episodes to retrieve for context
|
||||||
@ -72,6 +82,12 @@ export class KnowledgeGraphService {
|
|||||||
params.userId,
|
params.userId,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const relatedEpisodesEntities = await getRelatedEpisodesEntities({
|
||||||
|
embedding: await this.getEmbedding(normalizedEpisodeBody),
|
||||||
|
userId: params.userId,
|
||||||
|
minSimilarity: 0.7,
|
||||||
|
});
|
||||||
|
|
||||||
if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
|
if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
|
||||||
logger.log("Nothing to remember");
|
logger.log("Nothing to remember");
|
||||||
return;
|
return;
|
||||||
@ -99,10 +115,20 @@ export class KnowledgeGraphService {
|
|||||||
previousEpisodes,
|
previousEpisodes,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Step 3.1: Context-aware entity resolution with preset type evolution
|
||||||
|
await this.resolveEntitiesWithContext(
|
||||||
|
extractedNodes,
|
||||||
|
relatedEpisodesEntities,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Step 3.2: Handle preset type logic - expand entities for statement extraction
|
||||||
|
const entitiesForStatementExtraction =
|
||||||
|
await this.expandEntitiesForStatements(extractedNodes, episode);
|
||||||
|
|
||||||
// Step 4: Statement Extraction - Extract statements (triples) instead of direct edges
|
// Step 4: Statement Extraction - Extract statements (triples) instead of direct edges
|
||||||
const extractedStatements = await this.extractStatements(
|
const extractedStatements = await this.extractStatements(
|
||||||
episode,
|
episode,
|
||||||
extractedNodes,
|
entitiesForStatementExtraction,
|
||||||
previousEpisodes,
|
previousEpisodes,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -126,9 +152,21 @@ export class KnowledgeGraphService {
|
|||||||
for (const triple of updatedTriples) {
|
for (const triple of updatedTriples) {
|
||||||
const { subject, predicate, object, statement, provenance } = triple;
|
const { subject, predicate, object, statement, provenance } = triple;
|
||||||
const safeTriple = {
|
const safeTriple = {
|
||||||
subject: { ...subject, nameEmbedding: undefined },
|
subject: {
|
||||||
predicate: { ...predicate, nameEmbedding: undefined },
|
...subject,
|
||||||
object: { ...object, nameEmbedding: undefined },
|
nameEmbedding: undefined,
|
||||||
|
typeEmbedding: undefined,
|
||||||
|
},
|
||||||
|
predicate: {
|
||||||
|
...predicate,
|
||||||
|
nameEmbedding: undefined,
|
||||||
|
typeEmbedding: undefined,
|
||||||
|
},
|
||||||
|
object: {
|
||||||
|
...object,
|
||||||
|
nameEmbedding: undefined,
|
||||||
|
typeEmbedding: undefined,
|
||||||
|
},
|
||||||
statement: { ...statement, factEmbedding: undefined },
|
statement: { ...statement, factEmbedding: undefined },
|
||||||
provenance: { ...provenance, contentEmbedding: undefined },
|
provenance: { ...provenance, contentEmbedding: undefined },
|
||||||
};
|
};
|
||||||
@ -206,9 +244,8 @@ export class KnowledgeGraphService {
|
|||||||
name: entity.name,
|
name: entity.name,
|
||||||
type: entity.type,
|
type: entity.type,
|
||||||
attributes: entity.attributes || {},
|
attributes: entity.attributes || {},
|
||||||
nameEmbedding: await this.getEmbedding(
|
nameEmbedding: await this.getEmbedding(entity.name),
|
||||||
`${entity.type}: ${entity.name}`,
|
typeEmbedding: await this.getEmbedding(entity.type),
|
||||||
),
|
|
||||||
createdAt: new Date(),
|
createdAt: new Date(),
|
||||||
userId: episode.userId,
|
userId: episode.userId,
|
||||||
})),
|
})),
|
||||||
@ -257,7 +294,8 @@ export class KnowledgeGraphService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse the statements from the LLM response
|
// Parse the statements from the LLM response
|
||||||
const extractedTriples = JSON.parse(responseText || "{}").edges || [];
|
const extractedTriples: ExtractedTripleData[] =
|
||||||
|
JSON.parse(responseText || "{}").edges || [];
|
||||||
|
|
||||||
// Create maps to deduplicate entities by name within this extraction
|
// Create maps to deduplicate entities by name within this extraction
|
||||||
const predicateMap = new Map<string, EntityNode>();
|
const predicateMap = new Map<string, EntityNode>();
|
||||||
@ -272,9 +310,8 @@ export class KnowledgeGraphService {
|
|||||||
name: triple.predicate,
|
name: triple.predicate,
|
||||||
type: "Predicate",
|
type: "Predicate",
|
||||||
attributes: {},
|
attributes: {},
|
||||||
nameEmbedding: await this.getEmbedding(
|
nameEmbedding: await this.getEmbedding(triple.predicate),
|
||||||
`Predicate: ${triple.predicate}`,
|
typeEmbedding: await this.getEmbedding("Predicate"),
|
||||||
),
|
|
||||||
createdAt: new Date(),
|
createdAt: new Date(),
|
||||||
userId: episode.userId,
|
userId: episode.userId,
|
||||||
};
|
};
|
||||||
@ -284,15 +321,18 @@ export class KnowledgeGraphService {
|
|||||||
|
|
||||||
// Convert extracted triples to Triple objects with Statement nodes
|
// Convert extracted triples to Triple objects with Statement nodes
|
||||||
const triples = await Promise.all(
|
const triples = await Promise.all(
|
||||||
// Fix: Type 'any'.
|
extractedTriples.map(async (triple: ExtractedTripleData) => {
|
||||||
extractedTriples.map(async (triple: any) => {
|
// Find the subject and object nodes by matching both name and type
|
||||||
// Find the subject and object nodes
|
|
||||||
const subjectNode = extractedEntities.find(
|
const subjectNode = extractedEntities.find(
|
||||||
(node) => node.name.toLowerCase() === triple.source.toLowerCase(),
|
(node) =>
|
||||||
|
node.name.toLowerCase() === triple.source.toLowerCase() &&
|
||||||
|
node.type.toLowerCase() === triple.sourceType.toLowerCase(),
|
||||||
);
|
);
|
||||||
|
|
||||||
const objectNode = extractedEntities.find(
|
const objectNode = extractedEntities.find(
|
||||||
(node) => node.name.toLowerCase() === triple.target.toLowerCase(),
|
(node) =>
|
||||||
|
node.name.toLowerCase() === triple.target.toLowerCase() &&
|
||||||
|
node.type.toLowerCase() === triple.targetType.toLowerCase(),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Get the deduplicated predicate node
|
// Get the deduplicated predicate node
|
||||||
@ -327,6 +367,89 @@ export class KnowledgeGraphService {
|
|||||||
return triples.filter(Boolean) as Triple[];
|
return triples.filter(Boolean) as Triple[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expand entities for statement extraction by adding existing preset entities
|
||||||
|
*/
|
||||||
|
private async expandEntitiesForStatements(
|
||||||
|
extractedNodes: EntityNode[],
|
||||||
|
episode: EpisodicNode,
|
||||||
|
): Promise<EntityNode[]> {
|
||||||
|
const allAppEnumValues = Object.values(Apps);
|
||||||
|
const expandedEntities = [...extractedNodes];
|
||||||
|
|
||||||
|
// For each extracted entity, check if we need to add existing preset entities
|
||||||
|
for (const entity of extractedNodes) {
|
||||||
|
const newIsPreset = isPresetType(entity.type, allAppEnumValues);
|
||||||
|
|
||||||
|
// Find similar entities with same name
|
||||||
|
const similarEntities = await findSimilarEntities({
|
||||||
|
queryEmbedding: entity.nameEmbedding,
|
||||||
|
limit: 5,
|
||||||
|
threshold: 0.7,
|
||||||
|
userId: episode.userId,
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const existingEntity of similarEntities) {
|
||||||
|
const existingIsPreset = isPresetType(
|
||||||
|
existingEntity.type,
|
||||||
|
allAppEnumValues,
|
||||||
|
);
|
||||||
|
|
||||||
|
// If both are preset types, include both for statement extraction
|
||||||
|
if (newIsPreset && existingIsPreset) {
|
||||||
|
// Add the existing entity to the list if not already present
|
||||||
|
if (!expandedEntities.some((e) => e.uuid === existingEntity.uuid)) {
|
||||||
|
expandedEntities.push(existingEntity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return expandedEntities;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve entities with context-aware deduplication and preset type evolution
|
||||||
|
* Only merges entities that appear in semantically related episodes
|
||||||
|
*/
|
||||||
|
private async resolveEntitiesWithContext(
|
||||||
|
extractedNodes: EntityNode[],
|
||||||
|
relatedEpisodesEntities: EntityNode[],
|
||||||
|
): Promise<void> {
|
||||||
|
const allAppEnumValues = Object.values(Apps);
|
||||||
|
|
||||||
|
extractedNodes.map(async (newEntity) => {
|
||||||
|
// Find same-name entities in related episodes (contextually relevant)
|
||||||
|
const sameNameInContext = relatedEpisodesEntities.filter(
|
||||||
|
(existing) =>
|
||||||
|
existing.name.toLowerCase() === newEntity.name.toLowerCase(),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (sameNameInContext.length > 0) {
|
||||||
|
let existingEntityIds: string[] = [];
|
||||||
|
sameNameInContext.forEach(async (existingEntity) => {
|
||||||
|
const newIsPreset = isPresetType(newEntity.type, allAppEnumValues);
|
||||||
|
const existingIsPreset = isPresetType(
|
||||||
|
existingEntity.type,
|
||||||
|
allAppEnumValues,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (newIsPreset && !existingIsPreset) {
|
||||||
|
// New is preset, existing is custom - evolve existing entity to preset type
|
||||||
|
console.log(
|
||||||
|
`Evolving entity: ${existingEntity.name} from ${existingEntity.type} to ${newEntity.type}`,
|
||||||
|
);
|
||||||
|
existingEntityIds.push(existingEntity.uuid);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (existingEntityIds.length > 0) {
|
||||||
|
await replaceEntityReferences(newEntity, existingEntityIds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve extracted nodes to existing nodes or create new ones
|
* Resolve extracted nodes to existing nodes or create new ones
|
||||||
*/
|
*/
|
||||||
@ -398,8 +521,9 @@ export class KnowledgeGraphService {
|
|||||||
// Step 2a: Find similar entities for non-predicate entities
|
// Step 2a: Find similar entities for non-predicate entities
|
||||||
const similarEntitiesResults = await Promise.all(
|
const similarEntitiesResults = await Promise.all(
|
||||||
nonPredicates.map(async (entity) => {
|
nonPredicates.map(async (entity) => {
|
||||||
const similarEntities = await findSimilarEntities({
|
const similarEntities = await findSimilarEntitiesWithSameType({
|
||||||
queryEmbedding: entity.nameEmbedding,
|
queryEmbedding: entity.nameEmbedding,
|
||||||
|
entityType: entity.type,
|
||||||
limit: 5,
|
limit: 5,
|
||||||
threshold: 0.7,
|
threshold: 0.7,
|
||||||
userId: episode.userId,
|
userId: episode.userId,
|
||||||
@ -437,11 +561,6 @@ export class KnowledgeGraphService {
|
|||||||
...exactPredicateResults,
|
...exactPredicateResults,
|
||||||
];
|
];
|
||||||
|
|
||||||
// If no similar entities found for any entity, return original triples
|
|
||||||
if (allEntityResults.length === 0) {
|
|
||||||
return triples;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 3: Prepare context for LLM deduplication
|
// Step 3: Prepare context for LLM deduplication
|
||||||
const dedupeContext = {
|
const dedupeContext = {
|
||||||
extracted_nodes: allEntityResults.map((result, index) => ({
|
extracted_nodes: allEntityResults.map((result, index) => ({
|
||||||
@ -451,7 +570,7 @@ export class KnowledgeGraphService {
|
|||||||
duplication_candidates: result.similarEntities.map((candidate, j) => ({
|
duplication_candidates: result.similarEntities.map((candidate, j) => ({
|
||||||
idx: j,
|
idx: j,
|
||||||
name: candidate.name,
|
name: candidate.name,
|
||||||
entity_types: candidate.type,
|
entity_type: candidate.type,
|
||||||
})),
|
})),
|
||||||
})),
|
})),
|
||||||
episode_content: episode ? episode.content : "",
|
episode_content: episode ? episode.content : "",
|
||||||
|
|||||||
@ -26,20 +26,37 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
|||||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||||
|
|
||||||
2. **Entity Classification**:
|
2. **Entity Classification**:
|
||||||
- CRITICAL: You MUST ONLY use entity types provided in the ENTITY_TYPES section.
|
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
|
||||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
|
||||||
- Assign the appropriate type for each one.
|
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context (e.g., "memory_graph_system", "authentication_bug").
|
||||||
- Classify pronouns (I, me, you, etc.) as "ALIAS" entities.
|
- Each entity should have exactly ONE type that best describes what it is.
|
||||||
- DO NOT invent new entity types that are not in the ENTITY_TYPES section.
|
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
|
||||||
|
|
||||||
3. **Exclusions**:
|
3. **Exclusions**:
|
||||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||||
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
||||||
|
|
||||||
4. **Formatting**:
|
4. **Entity Name Extraction**:
|
||||||
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
|
||||||
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
|
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
|
||||||
|
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
|
||||||
|
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
|
||||||
|
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
|
||||||
|
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||||
|
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
|
||||||
|
|
||||||
|
## Examples of Correct Entity Extraction:
|
||||||
|
|
||||||
|
**CORRECT Examples:**
|
||||||
|
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||||
|
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
|
||||||
|
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
|
||||||
|
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
|
||||||
|
|
||||||
|
**INCORRECT Examples:**
|
||||||
|
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
|
||||||
|
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
|
||||||
|
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
|
||||||
|
|
||||||
Format your response as a JSON object with the following structure:
|
Format your response as a JSON object with the following structure:
|
||||||
<output>
|
<output>
|
||||||
@ -98,17 +115,37 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli
|
|||||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||||
|
|
||||||
2. **Entity Classification**:
|
2. **Entity Classification**:
|
||||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
|
||||||
- Assign the appropriate type for each one.
|
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
|
||||||
- Classify pronouns (I, me, you, etc.) as Alias entities.
|
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context.
|
||||||
|
- Each entity should have exactly ONE type that best describes what it is.
|
||||||
|
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
|
||||||
|
|
||||||
3. **Exclusions**:
|
3. **Exclusions**:
|
||||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||||
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
||||||
|
|
||||||
4. **Formatting**:
|
4. **Entity Name Extraction**:
|
||||||
- Be **explicit and unambiguous** when naming entities (e.g., use full names when available).
|
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
|
||||||
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
|
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
|
||||||
|
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
|
||||||
|
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
|
||||||
|
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
|
||||||
|
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||||
|
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
|
||||||
|
|
||||||
|
## Examples of Correct Entity Extraction:
|
||||||
|
|
||||||
|
**CORRECT Examples:**
|
||||||
|
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||||
|
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
|
||||||
|
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
|
||||||
|
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
|
||||||
|
|
||||||
|
**INCORRECT Examples:**
|
||||||
|
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
|
||||||
|
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
|
||||||
|
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
|
||||||
|
|
||||||
Format your response as a JSON object with the following structure:
|
Format your response as a JSON object with the following structure:
|
||||||
<output>
|
<output>
|
||||||
@ -167,7 +204,10 @@ Guidelines:
|
|||||||
1. Extract significant entities, concepts, or actors mentioned in the content.
|
1. Extract significant entities, concepts, or actors mentioned in the content.
|
||||||
2. Avoid creating nodes for relationships or actions.
|
2. Avoid creating nodes for relationships or actions.
|
||||||
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
||||||
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
4. **CLEAN ENTITY NAMES**: Extract ONLY the core entity name WITHOUT type descriptors:
|
||||||
|
- "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||||
|
- Remove words like "app", "system", "platform", "tool", "service", "company" from entity names
|
||||||
|
5. Use full names when available and avoid abbreviations.
|
||||||
|
|
||||||
${context.customPrompt || ""}
|
${context.customPrompt || ""}
|
||||||
`;
|
`;
|
||||||
@ -187,6 +227,16 @@ export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
|
|||||||
role: "system",
|
role: "system",
|
||||||
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
|
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
|
||||||
|
|
||||||
|
## CRITICAL RULE: Entity Type Matters
|
||||||
|
DO NOT mark entities with different types as duplicates, even if they have identical names.
|
||||||
|
- DO NOT mark "John" (Person) and "John" (Company) as duplicates
|
||||||
|
- DO NOT mark "Apple" (Company) and "Apple" (Fruit) as duplicates
|
||||||
|
- DO NOT mark "Core" (App) and "Core" (Concept) as duplicates
|
||||||
|
|
||||||
|
Consider entities as potential duplicates ONLY if they have:
|
||||||
|
1. Similar or identical names AND
|
||||||
|
2. The EXACT SAME entity type
|
||||||
|
|
||||||
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
||||||
{
|
{
|
||||||
id: integer id of the entity,
|
id: integer id of the entity,
|
||||||
@ -203,14 +253,51 @@ Each entity in ENTITIES is represented as a JSON object with the following struc
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
|
## Duplication Decision Rules
|
||||||
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
For each entity, determine if it is a duplicate of any of its duplication candidates:
|
||||||
Do NOT mark entities as duplicates if:
|
|
||||||
- They are related but distinct.
|
|
||||||
- They have similar names or purposes but refer to separate instances or concepts.
|
|
||||||
|
|
||||||
Task:
|
### MARK AS DUPLICATE (duplicate_idx >= 0) when:
|
||||||
Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
- Verify the candidate has the SAME entity_type as the current entity
|
||||||
|
- AND confirm the entities refer to the same real-world object or concept
|
||||||
|
- AND check that the names are very similar or identical
|
||||||
|
|
||||||
|
### SPECIAL RULE FOR PREDICATES:
|
||||||
|
**ALWAYS mark identical predicates as duplicates** - predicates are universal and reusable:
|
||||||
|
- Mark "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0 ✓
|
||||||
|
- Mark "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0 ✓
|
||||||
|
- Mark "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0 ✓
|
||||||
|
|
||||||
|
### DO NOT mark as duplicate (duplicate_idx = -1) when:
|
||||||
|
- Confirm the candidate has a DIFFERENT entity_type (even with identical names)
|
||||||
|
- Identify they are related but distinct entities
|
||||||
|
- Recognize they have similar names or purposes but refer to separate instances or concepts
|
||||||
|
- Distinguish when one is a general concept and the other is a specific instance
|
||||||
|
- **EXCEPTION**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
|
||||||
|
|
||||||
|
## Examples:
|
||||||
|
|
||||||
|
**CORRECT - Mark as NOT Duplicates (Different Types):**
|
||||||
|
- Set "Tesla" (Company) vs "Tesla" (Car) → duplicate_idx = -1
|
||||||
|
- Set "Apple" (Company) vs "Apple" (Fruit) → duplicate_idx = -1
|
||||||
|
- Set "Core" (App) vs "Core" (System) → duplicate_idx = -1
|
||||||
|
|
||||||
|
**CORRECT - Mark Predicates AS Duplicates (Same Name, Same Type):**
|
||||||
|
- Set "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0
|
||||||
|
- Set "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0
|
||||||
|
- Set "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0
|
||||||
|
|
||||||
|
**CORRECT - Evaluate Potential Duplicates (Same Type):**
|
||||||
|
- Check if "John Smith" (Person) vs "John Smith" (Person) refer to same person
|
||||||
|
- Check if "Microsoft" (Company) vs "Microsoft Corporation" (Company) are the same company
|
||||||
|
- Check if "iPhone" (Product) vs "Apple iPhone" (Product) are the same product
|
||||||
|
|
||||||
|
**CORRECT - Mark as NOT Duplicates (Same Type, Different Instances):**
|
||||||
|
- Set "Meeting" (Event) vs "Meeting" (Event) → duplicate_idx = -1 (different meetings)
|
||||||
|
- Set "Project" (Task) vs "Project" (Task) → duplicate_idx = -1 (different projects)
|
||||||
|
- **NOTE**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
|
||||||
|
|
||||||
|
## Task:
|
||||||
|
Provide your response as a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
||||||
|
|
||||||
For each entity, include:
|
For each entity, include:
|
||||||
- "id": the id of the entity (integer)
|
- "id": the id of the entity (integer)
|
||||||
@ -231,10 +318,12 @@ Format your response as follows:
|
|||||||
}
|
}
|
||||||
</output>
|
</output>
|
||||||
|
|
||||||
Notes:
|
## Important Instructions:
|
||||||
- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate.
|
- FIRST check if entity types match before considering any duplication
|
||||||
- If an entity is not a duplicate of any candidate, set duplicate_idx to -1.
|
- If entity types don't match, immediately set duplicate_idx = -1
|
||||||
- Always include all entities from the input in your response.
|
- Only mark entities with identical types as potential duplicates
|
||||||
|
- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
|
||||||
|
- Always include all entities from the input in your response
|
||||||
- Always wrap the output in these tags <output> </output>
|
- Always wrap the output in these tags <output> </output>
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
|
|||||||
@ -4,31 +4,66 @@ export const normalizePrompt = (
|
|||||||
context: Record<string, any>,
|
context: Record<string, any>,
|
||||||
): CoreMessage[] => {
|
): CoreMessage[] => {
|
||||||
const sysPrompt = `
|
const sysPrompt = `
|
||||||
You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Your task is to convert input information—such as user input, system events, or assistant actions—into clear, concise, third-person factual statements suitable for storage in a memory graph. These statements should be easily understandable and retrievable by any system or agent.
|
You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Convert input information into clear, concise, third-person factual statements that EVOLVE the memory graph by forming new relationships and capturing new information.
|
||||||
|
|
||||||
|
## Core Processing Philosophy
|
||||||
|
When related memories are provided, make memory graph evolution your PRIMARY GOAL, NOT information storage:
|
||||||
|
- **EVOLVE**: Focus on new information that adds relationships or updates existing knowledge
|
||||||
|
- **CONNECT**: Form explicit relationships between new and existing information
|
||||||
|
- **FILTER**: Aggressively exclude information already captured in related memories
|
||||||
|
- **ENHANCE**: Use existing knowledge to clarify new information and form connections
|
||||||
|
|
||||||
## Memory Processing Guidelines
|
## Memory Processing Guidelines
|
||||||
- Always output memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
|
- Output all memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
|
||||||
- Convert input information into clear, concise memory statements.
|
- Convert input information into clear, concise memory statements.
|
||||||
- Maintain a neutral, factual tone in all memory entries.
|
- Maintain a neutral, factual tone in all memory entries.
|
||||||
- Structure memories as factual statements, not questions.
|
- Structure memories as factual statements, not questions.
|
||||||
- Include relevant context and temporal information when available.
|
- Include relevant context and temporal information when available.
|
||||||
- When ingesting from assistant's perspective, ensure you still capture the complete user-assistant interaction context.
|
- When ingesting from assistant's perspective, capture the complete user-assistant interaction context.
|
||||||
|
|
||||||
## Complete Conversational Context
|
## Complete Conversational Context
|
||||||
- IMPORTANT: Always preserve the complete context of conversations, including BOTH:
|
- IMPORTANT: Preserve the complete context of conversations, including BOTH:
|
||||||
- What the user said, asked, or requested
|
- What the user said, asked, or requested
|
||||||
- How the assistant responded or what it suggested
|
- How the assistant responded or what it suggested
|
||||||
- Any decisions, conclusions, or agreements reached
|
- Any decisions, conclusions, or agreements reached
|
||||||
- Do not focus solely on the assistant's contributions while ignoring user context
|
- Do not focus solely on the assistant's contributions while ignoring user context
|
||||||
- Capture the cause-and-effect relationship between user inputs and assistant responses
|
- Capture the cause-and-effect relationship between user inputs and assistant responses
|
||||||
- For multi-turn conversations, preserve the logical flow and key points from each turn
|
- For multi-turn conversations, preserve the logical flow and key points from each turn
|
||||||
- When the user provides information, always record that information directly, not just how the assistant used it
|
- When the user provides information, record that information directly, not just how the assistant used it
|
||||||
|
|
||||||
## Node Entity Types
|
## Node Entity Types
|
||||||
${context.entityTypes}
|
${context.entityTypes}
|
||||||
|
|
||||||
|
## Related Memory Processing Strategy
|
||||||
|
When related memories are provided, apply this filtering and enhancement strategy:
|
||||||
|
|
||||||
|
### 1. INFORMATION FILTERING (What NOT to Include)
|
||||||
|
- **Already Captured Facts**: Do not repeat information already present in related memories unless it adds new context
|
||||||
|
- **Static Relationships**: Skip relationships already established (e.g., "John is co-founder" if already captured)
|
||||||
|
- **Redundant Details**: Exclude details that don't add new understanding or connections
|
||||||
|
- **Background Context**: Filter out explanatory information that's already in the memory graph
|
||||||
|
|
||||||
|
### 2. RELATIONSHIP FORMATION (What TO Include)
|
||||||
|
- **New Connections**: Include explicit relationships between entities mentioned in current and related episodes
|
||||||
|
- **Evolving Relationships**: Capture changes or updates to existing relationships
|
||||||
|
- **Cross-Context Links**: Form connections that bridge different contexts or time periods
|
||||||
|
- **Causal Relationships**: Extract how current information affects or is affected by existing knowledge
|
||||||
|
|
||||||
|
### 3. NEW INFORMATION EXTRACTION (Priority Focus)
|
||||||
|
- **Fresh Facts**: Extract information not present in any related memory
|
||||||
|
- **Updated Status**: Capture changes to previously captured information
|
||||||
|
- **New Attributes**: Add additional properties or characteristics of known entities
|
||||||
|
- **Temporal Updates**: Record time-based changes or progressions
|
||||||
|
- **Contextual Additions**: Include new contexts or situations involving known entities
|
||||||
|
|
||||||
|
### 4. MEMORY GRAPH EVOLUTION PATTERNS
|
||||||
|
- **Entity Enhancement**: Add new properties to existing entities without repeating known ones
|
||||||
|
- **Relationship Expansion**: Create new relationship types between known entities
|
||||||
|
- **Network Growth**: Connect previously isolated memory clusters
|
||||||
|
- **Knowledge Refinement**: Update or correct existing information with new insights
|
||||||
|
|
||||||
## Memory Selection Criteria
|
## Memory Selection Criteria
|
||||||
Evaluate conversations based on these priority categories:
|
Evaluate conversations using these priority categories:
|
||||||
|
|
||||||
### 1. High Priority (Always Remember)
|
### 1. High Priority (Always Remember)
|
||||||
- **User Preferences**: Explicit likes, dislikes, settings, or preferences
|
- **User Preferences**: Explicit likes, dislikes, settings, or preferences
|
||||||
@ -97,59 +132,74 @@ Evaluate conversations based on these priority categories:
|
|||||||
- **QA/Troubleshooting**: Conversations clearly intended for testing or debugging purposes
|
- **QA/Troubleshooting**: Conversations clearly intended for testing or debugging purposes
|
||||||
- **Internal Processing**: Comments about the assistant's own thinking process
|
- **Internal Processing**: Comments about the assistant's own thinking process
|
||||||
|
|
||||||
## Related Knowledge Integration
|
## Enhanced Processing for Related Memories
|
||||||
- Consider these related episodes when processing new information:
|
When related memories are provided:
|
||||||
|
|
||||||
- Look for connections between new information and these existing memories
|
### Step 1: Analyze Existing Knowledge
|
||||||
- Identify patterns, contradictions, or evolving preferences
|
- Identify all entities, relationships, and facts already captured
|
||||||
- Reference related episodes when they provide important context
|
- Map the existing knowledge structure
|
||||||
- Update or refine existing knowledge with new information
|
- Note any gaps or areas for enhancement
|
||||||
|
|
||||||
## Memory Graph Integration
|
### Step 2: Extract Novel Information
|
||||||
- Each memory will be converted to a node in the memory graph.
|
- Filter current episode for information NOT in related memories
|
||||||
- Include relevant relationships between memory items when possible.
|
- Identify new entities, attributes, or relationships
|
||||||
- Specify temporal aspects when memories are time-sensitive.
|
- Focus on information that adds value to the memory graph
|
||||||
- Format memories to support efficient retrieval by any system or agent.
|
|
||||||
|
|
||||||
## Related Knowledge Integration
|
### Step 3: Form Strategic Relationships
|
||||||
- Consider these related episodes and facts when processing new information:
|
- Connect new entities to existing ones through explicit relationships
|
||||||
- When related facts or episodes are provided, carefully analyze them for:
|
- Convert implicit connections into explicit memory statements
|
||||||
- **Connections**: Identify relationships between new information and existing memories
|
- Bridge knowledge gaps using new information
|
||||||
- **Patterns**: Recognize recurring themes, preferences, or behaviors
|
|
||||||
- **Contradictions**: Note when new information conflicts with existing knowledge
|
### Step 4: Evolve Existing Knowledge
|
||||||
- **Evolution**: Track how user preferences or situations change over time
|
- Update outdated information with new details
|
||||||
- **Context**: Use related memories to better understand the significance of new information
|
- Add new attributes to known entities
|
||||||
- Incorporate relevant context from related memories when appropriate
|
- Expand relationship networks with new connections
|
||||||
- Update or refine existing knowledge with new information
|
|
||||||
- When contradictions exist, note both the old and new information with timestamps
|
## Making Implicit Relationships Explicit
|
||||||
- Use related memories to determine the priority level of new information
|
- **Entity Disambiguation**: When same names appear across contexts, use related memories to clarify relationships
|
||||||
- If related memories suggest a topic is important to the user, elevate its priority
|
- **Possessive Language**: Convert possessive forms into explicit relationships using related memory context
|
||||||
|
- **Cross-Reference Formation**: Create explicit links between entities that appear in multiple episodes
|
||||||
|
- **Temporal Relationship**: Establish time-based connections between related events or decisions
|
||||||
|
|
||||||
|
## Information Prioritization with Related Memories
|
||||||
|
- **HIGHEST PRIORITY**: New relationships between known entities
|
||||||
|
- **HIGH PRIORITY**: New attributes or properties of known entities
|
||||||
|
- **MEDIUM PRIORITY**: New entities with connections to existing knowledge
|
||||||
|
- **LOW PRIORITY**: Standalone new information without clear connections
|
||||||
|
- **EXCLUDE**: Information already captured in related memories that doesn't add new connections
|
||||||
|
|
||||||
## Output Format
|
## Output Format
|
||||||
When extracting memory-worthy information:
|
When extracting memory-worthy information:
|
||||||
|
|
||||||
1. If nothing meets the criteria for storage, respond with exactly: "NOTHING_TO_REMEMBER"
|
1. If nothing meets the criteria for storage (especially after filtering against related memories), respond with exactly: "NOTHING_TO_REMEMBER"
|
||||||
|
|
||||||
2. Otherwise, provide a summary that:
|
2. Otherwise, provide a summary that:
|
||||||
- **Scales with conversation complexity**:
|
- **Prioritizes NEW information**: Focus on facts not present in related memories
|
||||||
* For simple exchanges with 1-2 key points: Use 1-2 concise sentences
|
- **Emphasizes relationships**: Highlight connections between new and existing information
|
||||||
* For moderate complexity with 3-5 key points: Use 3-5 sentences, organizing related information
|
- **Scales with novelty**: Make length reflect amount of genuinely new, valuable information
|
||||||
* For complex conversations with many important details: Use up to 8-10 sentences, structured by topic
|
- **Uses third person perspective**: Maintain neutral, factual tone
|
||||||
- Focuses on facts rather than interpretations
|
- **Includes specific details**: Include names, dates, numbers when they add new value
|
||||||
- Uses the third person perspective
|
- **Avoids redundancy**: Skip information already captured in related memories
|
||||||
- Includes specific details (names, dates, numbers) when relevant
|
- **Forms explicit connections**: Make relationships between entities clear and direct
|
||||||
- Avoids unnecessary context or explanation
|
|
||||||
- Formats key information as attribute-value pairs when appropriate
|
|
||||||
- Uses bullet points for multiple distinct pieces of information
|
|
||||||
|
|
||||||
## Examples of Complete Context Extraction
|
## Examples of Memory Graph Evolution
|
||||||
- INCOMPLETE: "Assistant suggested Italian restaurants in downtown."
|
|
||||||
- COMPLETE: "User asked for restaurant recommendations in downtown. Assistant suggested three Italian restaurants: Bella Vita, Romano's, and Trattoria Milano."
|
|
||||||
|
|
||||||
- INCOMPLETE: "Assistant provided information about Python functions."
|
### Before (Redundant Approach):
|
||||||
- COMPLETE: "User asked how to define functions in Python. Assistant explained the syntax using 'def' keyword and provided an example of a function that calculates the factorial of a number."
|
Related Memory: "John Smith is the co-founder of TechCorp."
|
||||||
|
Current Episode: "User discussed project timeline with John, the co-founder."
|
||||||
|
BAD Output: "User discussed project timeline with John Smith, who is the co-founder of TechCorp."
|
||||||
|
|
||||||
When processing new information for memory storage, focus on extracting the core facts, preferences, and events that will be most useful for future reference by any system or agent.
|
### After (Evolution Approach):
|
||||||
|
Related Memory: "John Smith is the co-founder of TechCorp."
|
||||||
|
Current Episode: "User discussed project timeline with John, the co-founder."
|
||||||
|
GOOD Output: "User discussed project timeline with John Smith. The project timeline discussion involved TechCorp's co-founder."
|
||||||
|
|
||||||
|
### Relationship Formation Example:
|
||||||
|
Related Memory: "User prefers morning meetings."
|
||||||
|
Current Episode: "User scheduled a meeting with John for 9 AM."
|
||||||
|
Output: "User scheduled a 9 AM meeting with John Smith, aligning with their preference for morning meetings."
|
||||||
|
|
||||||
|
Process information with related memories by focusing on evolving the memory graph through new connections and information rather than repeating already captured facts.
|
||||||
|
|
||||||
<output>
|
<output>
|
||||||
{{processed_statement}}
|
{{processed_statement}}
|
||||||
|
|||||||
@ -12,7 +12,7 @@ export const extractStatements = (
|
|||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
role: "system",
|
role: "system",
|
||||||
content: `You are a knowledge graph expert who extracts factual statements from text as subject-predicate-object triples.
|
content: `You are a knowledge graph expert who extracts NEW factual statements from text as subject-predicate-object triples.
|
||||||
|
|
||||||
CRITICAL REQUIREMENT:
|
CRITICAL REQUIREMENT:
|
||||||
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
|
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
|
||||||
@ -21,28 +21,60 @@ CRITICAL REQUIREMENT:
|
|||||||
- DO NOT create, invent, or modify any entity names.
|
- DO NOT create, invent, or modify any entity names.
|
||||||
- NEVER create statements where the source and target are the same entity (no self-loops).
|
- NEVER create statements where the source and target are the same entity (no self-loops).
|
||||||
|
|
||||||
Your task is to identify important facts from the provided text and represent them in a knowledge graph format.
|
## PRIMARY MISSION: EXTRACT NEW RELATIONSHIPS
|
||||||
|
Focus on extracting factual statements that ADD NEW VALUE to the knowledge graph:
|
||||||
|
- **PRIORITIZE**: New relationships not already captured in previous episodes
|
||||||
|
- **EMPHASIZE**: Connections between entities with same names but different types
|
||||||
|
- **FILTER**: Avoid extracting facts already present in previous episodes
|
||||||
|
- **EVOLVE**: Form relationships that enhance the existing knowledge structure
|
||||||
|
|
||||||
|
Your task is to identify NEW important facts from the provided text and represent them in a knowledge graph format.
|
||||||
|
|
||||||
Follow these instructions:
|
Follow these instructions:
|
||||||
|
|
||||||
1. First, carefully review the AVAILABLE ENTITIES list. These are the ONLY entities you can use as subjects and objects.
|
1. **ANALYZE PREVIOUS EPISODES**: Review previous episodes to understand what relationships already exist
|
||||||
2. Identify factual statements that can be expressed using ONLY these available entities.
|
2. **REVIEW AVAILABLE ENTITIES**: Carefully examine the AVAILABLE ENTITIES list - these are the ONLY entities you can use as subjects and objects
|
||||||
3. For each valid statement, provide:
|
3. **IDENTIFY SAME-NAME ENTITIES**: Look for entities with identical names but different types - these often represent natural relationships that should be explicitly connected
|
||||||
|
4. **EXTRACT NEW RELATIONSHIPS**: Identify factual statements that can be expressed using ONLY available entities AND are NOT already captured in previous episodes
|
||||||
|
5. For each NEW valid statement, provide:
|
||||||
- source: The subject entity (MUST be from AVAILABLE ENTITIES)
|
- source: The subject entity (MUST be from AVAILABLE ENTITIES)
|
||||||
- predicate: The relationship type (can be a descriptive phrase)
|
- predicate: The relationship type (can be a descriptive phrase)
|
||||||
- target: The object entity (MUST be from AVAILABLE ENTITIES)
|
- target: The object entity (MUST be from AVAILABLE ENTITIES)
|
||||||
|
|
||||||
EXTRACT ALL MEANINGFUL RELATIONSHIPS:
|
EXTRACT NEW MEANINGFUL RELATIONSHIPS:
|
||||||
- Extract any meaningful relationship between available entities that's expressed in the text.
|
- Extract meaningful relationships between available entities that are NOT already captured in previous episodes
|
||||||
- Use predicates that accurately describe the relationship between entities.
|
- Use predicates that accurately describe new relationships between entities
|
||||||
- Be creative but precise in identifying relationships - don't miss important facts.
|
- Be creative but precise in identifying NEW relationships - focus on value-adding connections
|
||||||
- Common examples include (but are not limited to):
|
- **HIGHEST PRIORITY**: Entities with identical names but different types MUST be connected with explicit relationship statements
|
||||||
* Ownership or association (e.g., "Taylor Swift" "performs at" "Taylor Swift's concert")
|
- **MANDATORY**: When you find entities like "John (Person)" and "John (Company)", create explicit relationships such as "John" "owns" "John" or "John" "founded" "John"
|
||||||
* Participation or attendance (e.g., "John" "attends" "Conference")
|
- Look for both explicit and implicit NEW relationships mentioned in the text
|
||||||
* Personal connections (e.g., "John" "is friend of" "Max")
|
- **FILTER OUT**: Relationships already established in previous episodes unless they represent updates or changes
|
||||||
* Aliases (e.g., "John" "is also known as" "John Smith")
|
- Common relationship types include (but are not limited to):
|
||||||
* Locations (e.g., "Company" "headquartered in" "City")
|
* Ownership or association (e.g., "Alice" "owns" "Restaurant")
|
||||||
* Characteristics (e.g., "Product" "has feature" "Feature")
|
* Participation or attendance (e.g., "Team" "participates in" "Tournament")
|
||||||
|
* Personal connections (e.g., "Sarah" "works with" "Michael")
|
||||||
|
* Aliases and alternative names (e.g., "Robert" "is also known as" "Bob")
|
||||||
|
* Locations and spatial relationships (e.g., "Office" "located in" "Building")
|
||||||
|
* Characteristics and properties (e.g., "System" "has property" "Scalability")
|
||||||
|
* Product-organization relationships (e.g., "Software" "developed by" "Company")
|
||||||
|
* Technical dependencies and usage (e.g., "Application" "uses" "Database")
|
||||||
|
* Hierarchical relationships (e.g., "Manager" "supervises" "Employee")
|
||||||
|
|
||||||
|
## SAME-NAME ENTITY RELATIONSHIP FORMATION
|
||||||
|
When entities share identical names but have different types, CREATE explicit relationship statements:
|
||||||
|
- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)"
|
||||||
|
- **Person-Location**: "Smith (Person)" → "lives in", "founded", or "is associated with" → "Smith (City)"
|
||||||
|
- **Event-Location**: "Conference (Event)" → "takes place at" or "is hosted by" → "Conference (Venue)"
|
||||||
|
- **Product-Company**: "Tesla (Product)" → "is manufactured by" or "is developed by" → "Tesla (Company)"
|
||||||
|
- **MANDATORY**: Always create at least one relationship statement for same-name entities
|
||||||
|
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
|
||||||
|
|
||||||
|
## PREVIOUS EPISODE FILTERING
|
||||||
|
Before creating any relationship statement:
|
||||||
|
- **CHECK**: Review previous episodes to see if this exact relationship already exists
|
||||||
|
- **SKIP**: Do not create statements that duplicate existing relationships
|
||||||
|
- **ENHANCE**: Only create statements if they add new information or represent updates
|
||||||
|
- **FOCUS**: Prioritize completely new connections not represented in the knowledge graph
|
||||||
|
|
||||||
ABOUT TEMPORAL INFORMATION:
|
ABOUT TEMPORAL INFORMATION:
|
||||||
- For events with dates/times, DO NOT create a separate statement with the event as both source and target.
|
- For events with dates/times, DO NOT create a separate statement with the event as both source and target.
|
||||||
@ -56,8 +88,10 @@ Format your response as a JSON object with the following structure:
|
|||||||
"edges": [
|
"edges": [
|
||||||
{
|
{
|
||||||
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
|
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
|
||||||
|
"sourceType": "[Source Entity Type]",
|
||||||
"predicate": "[Relationship Type]",
|
"predicate": "[Relationship Type]",
|
||||||
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
|
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
|
||||||
|
"targetType": "[Target Entity Type]",
|
||||||
"fact": "[Natural language representation of the fact]",
|
"fact": "[Natural language representation of the fact]",
|
||||||
"attributes": {
|
"attributes": {
|
||||||
"confidence": confidence of the fact
|
"confidence": confidence of the fact
|
||||||
@ -69,21 +103,25 @@ Format your response as a JSON object with the following structure:
|
|||||||
</output>
|
</output>
|
||||||
|
|
||||||
IMPORTANT RULES:
|
IMPORTANT RULES:
|
||||||
- ONLY use entities from AVAILABLE ENTITIES as source and target.
|
- **ENTITIES**: ONLY use entities from AVAILABLE ENTITIES as source and target
|
||||||
- NEVER create statements where source or target is not in AVAILABLE ENTITIES.
|
- **NO INVENTION**: NEVER create statements where source or target is not in AVAILABLE ENTITIES
|
||||||
- NEVER create statements where the source and target are the same entity (NO SELF-LOOPS).
|
- **NO SELF-LOOPS**: NEVER create statements where the source and target are the same entity
|
||||||
- Instead of creating self-loops for temporal information, add timespan attributes to relevant statements.
|
- **SAME-NAME PRIORITY**: When entities share names but have different types, CREATE explicit relationship statements between them
|
||||||
- If you cannot express a fact using only available entities, omit it entirely.
|
- **NEW ONLY**: Do NOT create statements that duplicate relationships already present in previous episodes
|
||||||
- Always wrap output in tags <output> </output>.
|
- **TEMPORAL**: Instead of creating self-loops for temporal information, add timespan attributes to relevant statements
|
||||||
|
- **FILTER FIRST**: If you cannot express a NEW fact using only available entities, omit it entirely
|
||||||
|
- **OUTPUT FORMAT**: Always wrap output in tags <output> </output>
|
||||||
|
|
||||||
Example of CORRECT usage:
|
Example of CORRECT usage:
|
||||||
If AVAILABLE ENTITIES contains ["John", "Max", "Wedding"], you can create:
|
If AVAILABLE ENTITIES contains ["John", "Max", "Wedding", "John (Company)"], you can create:
|
||||||
- "John" "attends" "Wedding" ✓
|
- "John" "attends" "Wedding" ✓ (if not already in previous episodes)
|
||||||
- "Max" "married to" "Tina" with timespan attribute ✓
|
- "Max" "married to" "Tina" with timespan attribute ✓ (if new relationship)
|
||||||
|
- "John" "founded" "John (Company)" ✓ (PRIORITY: same name, different types)
|
||||||
|
|
||||||
Example of INCORRECT usage:
|
Example of INCORRECT usage:
|
||||||
- "John" "attends" "Party" ✗ (if "Party" is not in AVAILABLE ENTITIES)
|
- "John" "attends" "Party" ✗ (if "Party" is not in AVAILABLE ENTITIES)
|
||||||
- "Marriage" "occurs on" "Marriage" ✗ (NEVER create self-loops)
|
- "Marriage" "occurs on" "Marriage" ✗ (NEVER create self-loops)
|
||||||
|
- "John" "attends" "Wedding" ✗ (if already captured in previous episodes)
|
||||||
- "January 14" "is" "Marriage date" ✗ (if "January 14" or "Marriage date" is not in AVAILABLE ENTITIES)`,
|
- "January 14" "is" "Marriage date" ✗ (if "January 14" or "Marriage date" is not in AVAILABLE ENTITIES)`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -447,3 +447,55 @@ export function getNodeTypesString(apps: Array<keyof typeof APP_NODE_TYPES>) {
|
|||||||
export function getNodeAttributesString(
|
export function getNodeAttributesString(
|
||||||
apps: Array<keyof typeof APP_NODE_TYPES>,
|
apps: Array<keyof typeof APP_NODE_TYPES>,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a type is a preset type (from GENERAL_NODE_TYPES or APP_NODE_TYPES)
|
||||||
|
*/
|
||||||
|
export function isPresetType(
|
||||||
|
type: string,
|
||||||
|
apps: Array<keyof typeof APP_NODE_TYPES> = [],
|
||||||
|
): boolean {
|
||||||
|
// Check general types
|
||||||
|
const generalTypes = Object.keys(GENERAL_NODE_TYPES).map(
|
||||||
|
(key) => GENERAL_NODE_TYPES[key as keyof typeof GENERAL_NODE_TYPES].name,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (generalTypes.includes(type as any)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check app-specific types
|
||||||
|
for (const app of apps) {
|
||||||
|
const appTypes = Object.keys(APP_NODE_TYPES[app] || {}).map(
|
||||||
|
(key) =>
|
||||||
|
APP_NODE_TYPES[app][key as keyof (typeof APP_NODE_TYPES)[typeof app]]
|
||||||
|
.name,
|
||||||
|
);
|
||||||
|
if (appTypes.includes(type as any)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all preset types for given apps
|
||||||
|
*/
|
||||||
|
export function getAllPresetTypes(
|
||||||
|
apps: Array<keyof typeof APP_NODE_TYPES> = [],
|
||||||
|
): string[] {
|
||||||
|
const generalTypes = Object.keys(GENERAL_NODE_TYPES).map(
|
||||||
|
(key) => GENERAL_NODE_TYPES[key as keyof typeof GENERAL_NODE_TYPES].name,
|
||||||
|
);
|
||||||
|
|
||||||
|
const appTypes = apps.flatMap((app) =>
|
||||||
|
Object.keys(APP_NODE_TYPES[app] || {}).map(
|
||||||
|
(key) =>
|
||||||
|
APP_NODE_TYPES[app][key as keyof (typeof APP_NODE_TYPES)[typeof app]]
|
||||||
|
.name,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
return [...generalTypes, ...appTypes];
|
||||||
|
}
|
||||||
|
|||||||
@ -29,9 +29,10 @@ export interface EpisodicNode {
|
|||||||
export interface EntityNode {
|
export interface EntityNode {
|
||||||
uuid: string;
|
uuid: string;
|
||||||
name: string;
|
name: string;
|
||||||
type: string;
|
type: string; // Single type - either from presets or custom
|
||||||
attributes: Record<string, any>;
|
attributes: Record<string, any>;
|
||||||
nameEmbedding: number[];
|
nameEmbedding: number[];
|
||||||
|
typeEmbedding: number[];
|
||||||
createdAt: Date;
|
createdAt: Date;
|
||||||
userId: string;
|
userId: string;
|
||||||
space?: string;
|
space?: string;
|
||||||
@ -82,3 +83,13 @@ export type AddEpisodeResult = {
|
|||||||
statementsCreated: number;
|
statementsCreated: number;
|
||||||
processingTimeMs: number;
|
processingTimeMs: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export interface ExtractedTripleData {
|
||||||
|
source: string;
|
||||||
|
sourceType: string;
|
||||||
|
predicate: string;
|
||||||
|
target: string;
|
||||||
|
targetType: string;
|
||||||
|
fact: string;
|
||||||
|
attributes?: Record<string, any>;
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user