Feat: Delete episode nodes

Enhance: knowledge graphs with implicit relationships

- Added a new API route for deleting episodes, including related statements and entities.
- Introduced error handling for unauthorized access and non-existent episodes.
- Enhanced the KnowledgeGraphService with methods for resolving entities and managing relationships during deletions.
- Updated entity and episode models to support new deletion logic and ensure data integrity.
This commit is contained in:
Manoj K 2025-07-10 21:23:47 +05:30 committed by Harshith Mullapudi
parent 50c4e2bcce
commit 28803bec17
10 changed files with 813 additions and 126 deletions

View File

@ -0,0 +1,60 @@
import { z } from "zod";
import { json } from "@remix-run/node";
import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
import { deleteEpisodeWithRelatedNodes } from "~/services/graphModels/episode";
export const DeleteEpisodeBodyRequest = z.object({
episodeUuid: z.string().uuid("Episode UUID must be a valid UUID"),
});
const { action, loader } = createActionApiRoute(
{
body: DeleteEpisodeBodyRequest,
allowJWT: true,
method: "DELETE",
authorization: {
action: "delete",
},
corsStrategy: "all",
},
async ({ body, authentication }) => {
try {
const result = await deleteEpisodeWithRelatedNodes({
episodeUuid: body.episodeUuid,
userId: authentication.userId,
});
if (!result.episodeDeleted) {
return json(
{
error: "Episode not found or unauthorized",
code: "not_found"
},
{ status: 404 }
);
}
return json({
success: true,
message: "Episode deleted successfully",
deleted: {
episode: result.episodeDeleted,
statements: result.statementsDeleted,
entities: result.entitiesDeleted,
facts: result.factsDeleted,
},
});
} catch (error) {
console.error("Error deleting episode:", error);
return json(
{
error: "Failed to delete episode",
code: "internal_error"
},
{ status: 500 }
);
}
},
);
export { action, loader };

View File

@ -2,6 +2,19 @@ import type { EntityNode } from "@core/types";
import { runQuery } from "~/lib/neo4j.server";
export async function saveEntity(entity: EntityNode): Promise<string> {
// Debug: Log entity to identify missing typeEmbedding
if (!entity.typeEmbedding) {
console.error(`Entity missing typeEmbedding:`, {
uuid: entity.uuid,
name: entity.name,
type: entity.type,
hasNameEmbedding: !!entity.nameEmbedding,
});
throw new Error(
`Entity ${entity.name} (${entity.type}) is missing typeEmbedding`,
);
}
const query = `
MERGE (n:Entity {uuid: $uuid})
ON CREATE SET
@ -9,6 +22,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
n.type = $type,
n.attributes = $attributes,
n.nameEmbedding = $nameEmbedding,
n.typeEmbedding = $typeEmbedding,
n.createdAt = $createdAt,
n.userId = $userId,
n.space = $space
@ -17,6 +31,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
n.type = $type,
n.attributes = $attributes,
n.nameEmbedding = $nameEmbedding,
n.typeEmbedding = $typeEmbedding,
n.space = $space
RETURN n.uuid as uuid
`;
@ -27,6 +42,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
type: entity.type,
attributes: JSON.stringify(entity.attributes || {}),
nameEmbedding: entity.nameEmbedding,
typeEmbedding: entity.typeEmbedding,
createdAt: entity.createdAt.toISOString(),
userId: entity.userId,
space: entity.space || null,
@ -52,6 +68,7 @@ export async function getEntity(uuid: string): Promise<EntityNode | null> {
type: entity.type,
attributes: JSON.parse(entity.attributes || "{}"),
nameEmbedding: entity.nameEmbedding,
typeEmbedding: entity.typeEmbedding,
createdAt: new Date(entity.createdAt),
userId: entity.userId,
space: entity.space,
@ -85,6 +102,43 @@ export async function findSimilarEntities(params: {
type: entity.type,
attributes: JSON.parse(entity.attributes || "{}"),
nameEmbedding: entity.nameEmbedding,
typeEmbedding: entity.typeEmbedding,
createdAt: new Date(entity.createdAt),
userId: entity.userId,
space: entity.space,
};
});
}
export async function findSimilarEntitiesWithSameType(params: {
queryEmbedding: number[];
entityType: string;
limit: number;
threshold: number;
userId: string;
}): Promise<EntityNode[]> {
const query = `
MATCH (entity:Entity)
WHERE entity.nameEmbedding IS NOT NULL
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
WHERE score >= $threshold
AND entity.userId = $userId
AND entity.type = $entityType
RETURN entity, score
ORDER BY score DESC
`;
const result = await runQuery(query, params);
return result.map((record) => {
const entity = record.get("entity").properties;
return {
uuid: entity.uuid,
name: entity.name,
type: entity.type,
attributes: JSON.parse(entity.attributes || "{}"),
nameEmbedding: entity.nameEmbedding,
typeEmbedding: entity.typeEmbedding,
createdAt: new Date(entity.createdAt),
userId: entity.userId,
space: entity.space,
@ -115,9 +169,89 @@ export async function findExactPredicateMatches(params: {
type: entity.type,
attributes: JSON.parse(entity.attributes || "{}"),
nameEmbedding: entity.nameEmbedding,
typeEmbedding: entity.typeEmbedding,
createdAt: new Date(entity.createdAt),
userId: entity.userId,
space: entity.space,
};
});
}
/**
* Replace entity references in all statements with a new entity
* Updates all statements where the old entity appears as subject, predicate, or object
*/
export async function replaceEntityReferences(
evolvedEntity: EntityNode,
oldEntityUUIDs: string[],
): Promise<void> {
// Save the new entity first to ensure it exists in the database
await saveEntity(evolvedEntity);
// Then update all references from old entity to new entity
oldEntityUUIDs.forEach(async (oldEntityUUID) => {
await updateStatementsWithNewEntity(oldEntityUUID, evolvedEntity.uuid);
});
}
/**
* Update all statements that reference an old entity to use the new entity
* This includes updating subject, predicate, and object relationships
*/
export async function updateStatementsWithNewEntity(
oldEntityUUID: string,
newEntityUUID: string,
): Promise<void> {
const queries = [
// Update statements where old entity is the subject
`
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:SUBJECT]->(statement:Statement)
MATCH (newEntity:Entity {uuid: $newEntityUUID})
DELETE oldEntity-[:SUBJECT]->statement
CREATE newEntity-[:SUBJECT]->statement
`,
// Update statements where old entity is the predicate
`
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:PREDICATE]->(statement:Statement)
MATCH (newEntity:Entity {uuid: $newEntityUUID})
DELETE oldEntity-[:PREDICATE]->statement
CREATE newEntity-[:PREDICATE]->statement
`,
// Update statements where old entity is the object
`
MATCH (oldEntity:Entity {uuid: $oldEntityUUID})-[:OBJECT]->(statement:Statement)
MATCH (newEntity:Entity {uuid: $newEntityUUID})
DELETE oldEntity-[:OBJECT]->statement
CREATE newEntity-[:OBJECT]->statement
`,
];
const params = {
oldEntityUUID,
newEntityUUID,
};
// Execute all update queries
for (const query of queries) {
await runQuery(query, params);
}
// Optional: Delete the old entity if no longer referenced
await deleteEntityIfUnreferenced(oldEntityUUID);
}
/**
* Delete an entity if it's no longer referenced by any statements
*/
async function deleteEntityIfUnreferenced(entityUUID: string): Promise<void> {
const checkQuery = `
MATCH (entity:Entity {uuid: $entityUUID})
OPTIONAL MATCH (entity)-[r]-()
WITH entity, count(r) as relationshipCount
WHERE relationshipCount = 0
DELETE entity
RETURN count(entity) as deletedCount
`;
await runQuery(checkQuery, { entityUUID });
}

View File

@ -1,5 +1,5 @@
import { runQuery } from "~/lib/neo4j.server";
import type { EpisodicNode } from "@core/types";
import type { EntityNode, EpisodicNode } from "@core/types";
export async function saveEpisode(episode: EpisodicNode): Promise<string> {
const query = `
@ -178,3 +178,134 @@ export async function searchEpisodesByEmbedding(params: {
};
});
}
// Delete episode and its related nodes safely
export async function deleteEpisodeWithRelatedNodes(params: {
episodeUuid: string;
userId: string;
}): Promise<{
episodeDeleted: boolean;
statementsDeleted: number;
entitiesDeleted: number;
factsDeleted: number;
}> {
// Step 1: Check if episode exists
const episodeCheck = await runQuery(
`MATCH (e:Episode {uuid: $episodeUuid, userId: $userId}) RETURN e`,
{ episodeUuid: params.episodeUuid, userId: params.userId },
);
if (!episodeCheck || episodeCheck.length === 0) {
return {
episodeDeleted: false,
statementsDeleted: 0,
entitiesDeleted: 0,
factsDeleted: 0,
};
}
// Step 2: Find statements that are ONLY connected to this episode
const statementsToDelete = await runQuery(
`
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})-[:HAS_PROVENANCE]->(stmt:Statement)
WHERE NOT EXISTS {
MATCH (otherEpisode:Episode)-[:HAS_PROVENANCE]->(stmt)
WHERE otherEpisode.uuid <> $episodeUuid AND otherEpisode.userId = $userId
}
RETURN stmt.uuid as statementUuid
`,
{ episodeUuid: params.episodeUuid, userId: params.userId },
);
const statementUuids = statementsToDelete.map((r) => r.get("statementUuid"));
// Step 3: Find entities that are ONLY connected to statements we're deleting
const entitiesToDelete = await runQuery(
`
MATCH (stmt:Statement)-[r:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(entity:Entity)
WHERE stmt.uuid IN $statementUuids AND stmt.userId = $userId
AND NOT EXISTS {
MATCH (otherStmt:Statement)-[:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(entity)
WHERE otherStmt.userId = $userId AND NOT otherStmt.uuid IN $statementUuids
}
RETURN DISTINCT entity.uuid as entityUuid
`,
{ statementUuids, userId: params.userId },
);
const entityUuids = entitiesToDelete.map((r) => r.get("entityUuid"));
// Step 4: Delete statements
if (statementUuids.length > 0) {
await runQuery(
`
MATCH (stmt:Statement {userId: $userId})
WHERE stmt.uuid IN $statementUuids
DETACH DELETE stmt
`,
{ statementUuids, userId: params.userId },
);
}
// Step 5: Delete orphaned entities
if (entityUuids.length > 0) {
await runQuery(
`
MATCH (entity:Entity {userId: $userId})
WHERE entity.uuid IN $entityUuids
DETACH DELETE entity
`,
{ entityUuids, userId: params.userId },
);
}
// Step 6: Delete the episode
await runQuery(
`
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})
DETACH DELETE episode
`,
{ episodeUuid: params.episodeUuid, userId: params.userId },
);
return {
episodeDeleted: true,
statementsDeleted: statementUuids.length,
entitiesDeleted: entityUuids.length,
factsDeleted: statementUuids.length,
};
}
export async function getRelatedEpisodesEntities(params: {
embedding: number[];
userId: string;
limit?: number;
minSimilarity?: number;
}) {
const query = `
MATCH (episode:Episode {userId: $userId})
WHERE episode.contentEmbedding IS NOT NULL
WITH episode,
CASE
WHEN size(episode.contentEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
WHERE entity IS NOT NULL
RETURN DISTINCT entity`;
const result = await runQuery(query, {
embedding: params.embedding,
minSimilarity: params.minSimilarity,
userId: params.userId,
});
return result
.map((record) => {
const entity = record.get("entity");
return entity ? (entity.properties as EntityNode) : null;
})
.filter((entity): entity is EntityNode => entity !== null);
}

View File

@ -243,6 +243,7 @@ export async function getTripleForStatement({
name: subjectProps.name,
type: subjectProps.type,
nameEmbedding: subjectProps.nameEmbedding,
typeEmbedding: subjectProps.typeEmbedding,
attributes: subjectProps.attributesJson
? JSON.parse(subjectProps.attributesJson)
: {},
@ -255,6 +256,7 @@ export async function getTripleForStatement({
name: predicateProps.name,
type: predicateProps.type,
nameEmbedding: predicateProps.nameEmbedding,
typeEmbedding: predicateProps.typeEmbedding,
attributes: predicateProps.attributesJson
? JSON.parse(predicateProps.attributesJson)
: {},
@ -267,6 +269,7 @@ export async function getTripleForStatement({
name: objectProps.name,
type: objectProps.type,
nameEmbedding: objectProps.nameEmbedding,
typeEmbedding: objectProps.typeEmbedding,
attributes: objectProps.attributesJson
? JSON.parse(objectProps.attributesJson)
: {},

View File

@ -1,5 +1,6 @@
import { type CoreMessage } from "ai";
import {
type ExtractedTripleData,
type AddEpisodeParams,
type EntityNode,
type EpisodicNode,
@ -20,11 +21,14 @@ import {
} from "./prompts/statements";
import {
getRecentEpisodes,
getRelatedEpisodesEntities,
searchEpisodesByEmbedding,
} from "./graphModels/episode";
import {
findExactPredicateMatches,
findSimilarEntities,
findSimilarEntitiesWithSameType,
replaceEntityReferences,
} from "./graphModels/entity";
import {
findContradictoryStatements,
@ -35,7 +39,13 @@ import {
searchStatementsByEmbedding,
} from "./graphModels/statement";
import { getEmbedding, makeModelCall } from "~/lib/model.server";
import { Apps, getNodeTypes, getNodeTypesString } from "~/utils/presets/nodes";
import {
Apps,
getNodeTypes,
getNodeTypesString,
isPresetType,
getAllPresetTypes,
} from "~/utils/presets/nodes";
import { normalizePrompt } from "./prompts";
// Default number of previous episodes to retrieve for context
@ -72,6 +82,12 @@ export class KnowledgeGraphService {
params.userId,
);
const relatedEpisodesEntities = await getRelatedEpisodesEntities({
embedding: await this.getEmbedding(normalizedEpisodeBody),
userId: params.userId,
minSimilarity: 0.7,
});
if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
logger.log("Nothing to remember");
return;
@ -99,10 +115,20 @@ export class KnowledgeGraphService {
previousEpisodes,
);
// Step 3.1: Context-aware entity resolution with preset type evolution
await this.resolveEntitiesWithContext(
extractedNodes,
relatedEpisodesEntities,
);
// Step 3.2: Handle preset type logic - expand entities for statement extraction
const entitiesForStatementExtraction =
await this.expandEntitiesForStatements(extractedNodes, episode);
// Step 4: Statement Extraction - Extract statements (triples) instead of direct edges
const extractedStatements = await this.extractStatements(
episode,
extractedNodes,
entitiesForStatementExtraction,
previousEpisodes,
);
@ -126,9 +152,21 @@ export class KnowledgeGraphService {
for (const triple of updatedTriples) {
const { subject, predicate, object, statement, provenance } = triple;
const safeTriple = {
subject: { ...subject, nameEmbedding: undefined },
predicate: { ...predicate, nameEmbedding: undefined },
object: { ...object, nameEmbedding: undefined },
subject: {
...subject,
nameEmbedding: undefined,
typeEmbedding: undefined,
},
predicate: {
...predicate,
nameEmbedding: undefined,
typeEmbedding: undefined,
},
object: {
...object,
nameEmbedding: undefined,
typeEmbedding: undefined,
},
statement: { ...statement, factEmbedding: undefined },
provenance: { ...provenance, contentEmbedding: undefined },
};
@ -206,9 +244,8 @@ export class KnowledgeGraphService {
name: entity.name,
type: entity.type,
attributes: entity.attributes || {},
nameEmbedding: await this.getEmbedding(
`${entity.type}: ${entity.name}`,
),
nameEmbedding: await this.getEmbedding(entity.name),
typeEmbedding: await this.getEmbedding(entity.type),
createdAt: new Date(),
userId: episode.userId,
})),
@ -257,7 +294,8 @@ export class KnowledgeGraphService {
}
// Parse the statements from the LLM response
const extractedTriples = JSON.parse(responseText || "{}").edges || [];
const extractedTriples: ExtractedTripleData[] =
JSON.parse(responseText || "{}").edges || [];
// Create maps to deduplicate entities by name within this extraction
const predicateMap = new Map<string, EntityNode>();
@ -272,9 +310,8 @@ export class KnowledgeGraphService {
name: triple.predicate,
type: "Predicate",
attributes: {},
nameEmbedding: await this.getEmbedding(
`Predicate: ${triple.predicate}`,
),
nameEmbedding: await this.getEmbedding(triple.predicate),
typeEmbedding: await this.getEmbedding("Predicate"),
createdAt: new Date(),
userId: episode.userId,
};
@ -284,15 +321,18 @@ export class KnowledgeGraphService {
// Convert extracted triples to Triple objects with Statement nodes
const triples = await Promise.all(
// Fix: Type 'any'.
extractedTriples.map(async (triple: any) => {
// Find the subject and object nodes
extractedTriples.map(async (triple: ExtractedTripleData) => {
// Find the subject and object nodes by matching both name and type
const subjectNode = extractedEntities.find(
(node) => node.name.toLowerCase() === triple.source.toLowerCase(),
(node) =>
node.name.toLowerCase() === triple.source.toLowerCase() &&
node.type.toLowerCase() === triple.sourceType.toLowerCase(),
);
const objectNode = extractedEntities.find(
(node) => node.name.toLowerCase() === triple.target.toLowerCase(),
(node) =>
node.name.toLowerCase() === triple.target.toLowerCase() &&
node.type.toLowerCase() === triple.targetType.toLowerCase(),
);
// Get the deduplicated predicate node
@ -327,6 +367,89 @@ export class KnowledgeGraphService {
return triples.filter(Boolean) as Triple[];
}
/**
* Expand entities for statement extraction by adding existing preset entities
*/
private async expandEntitiesForStatements(
extractedNodes: EntityNode[],
episode: EpisodicNode,
): Promise<EntityNode[]> {
const allAppEnumValues = Object.values(Apps);
const expandedEntities = [...extractedNodes];
// For each extracted entity, check if we need to add existing preset entities
for (const entity of extractedNodes) {
const newIsPreset = isPresetType(entity.type, allAppEnumValues);
// Find similar entities with same name
const similarEntities = await findSimilarEntities({
queryEmbedding: entity.nameEmbedding,
limit: 5,
threshold: 0.7,
userId: episode.userId,
});
for (const existingEntity of similarEntities) {
const existingIsPreset = isPresetType(
existingEntity.type,
allAppEnumValues,
);
// If both are preset types, include both for statement extraction
if (newIsPreset && existingIsPreset) {
// Add the existing entity to the list if not already present
if (!expandedEntities.some((e) => e.uuid === existingEntity.uuid)) {
expandedEntities.push(existingEntity);
}
}
}
}
return expandedEntities;
}
/**
* Resolve entities with context-aware deduplication and preset type evolution
* Only merges entities that appear in semantically related episodes
*/
private async resolveEntitiesWithContext(
extractedNodes: EntityNode[],
relatedEpisodesEntities: EntityNode[],
): Promise<void> {
const allAppEnumValues = Object.values(Apps);
extractedNodes.map(async (newEntity) => {
// Find same-name entities in related episodes (contextually relevant)
const sameNameInContext = relatedEpisodesEntities.filter(
(existing) =>
existing.name.toLowerCase() === newEntity.name.toLowerCase(),
);
if (sameNameInContext.length > 0) {
let existingEntityIds: string[] = [];
sameNameInContext.forEach(async (existingEntity) => {
const newIsPreset = isPresetType(newEntity.type, allAppEnumValues);
const existingIsPreset = isPresetType(
existingEntity.type,
allAppEnumValues,
);
if (newIsPreset && !existingIsPreset) {
// New is preset, existing is custom - evolve existing entity to preset type
console.log(
`Evolving entity: ${existingEntity.name} from ${existingEntity.type} to ${newEntity.type}`,
);
existingEntityIds.push(existingEntity.uuid);
}
});
if (existingEntityIds.length > 0) {
await replaceEntityReferences(newEntity, existingEntityIds);
}
}
});
}
/**
* Resolve extracted nodes to existing nodes or create new ones
*/
@ -398,8 +521,9 @@ export class KnowledgeGraphService {
// Step 2a: Find similar entities for non-predicate entities
const similarEntitiesResults = await Promise.all(
nonPredicates.map(async (entity) => {
const similarEntities = await findSimilarEntities({
const similarEntities = await findSimilarEntitiesWithSameType({
queryEmbedding: entity.nameEmbedding,
entityType: entity.type,
limit: 5,
threshold: 0.7,
userId: episode.userId,
@ -437,11 +561,6 @@ export class KnowledgeGraphService {
...exactPredicateResults,
];
// If no similar entities found for any entity, return original triples
if (allEntityResults.length === 0) {
return triples;
}
// Step 3: Prepare context for LLM deduplication
const dedupeContext = {
extracted_nodes: allEntityResults.map((result, index) => ({
@ -451,7 +570,7 @@ export class KnowledgeGraphService {
duplication_candidates: result.similarEntities.map((candidate, j) => ({
idx: j,
name: candidate.name,
entity_types: candidate.type,
entity_type: candidate.type,
})),
})),
episode_content: episode ? episode.content : "",

View File

@ -26,20 +26,37 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- For pronouns that refer to named entities, extract them as separate Alias entities.
2. **Entity Classification**:
- CRITICAL: You MUST ONLY use entity types provided in the ENTITY_TYPES section.
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
- Assign the appropriate type for each one.
- Classify pronouns (I, me, you, etc.) as "ALIAS" entities.
- DO NOT invent new entity types that are not in the ENTITY_TYPES section.
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context (e.g., "memory_graph_system", "authentication_bug").
- Each entity should have exactly ONE type that best describes what it is.
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
3. **Exclusions**:
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
- Do NOT extract dates, times, or other temporal informationthese will be handled separately.
4. **Formatting**:
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
4. **Entity Name Extraction**:
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
## Examples of Correct Entity Extraction:
**CORRECT Examples:**
- Text: "Tesla car" Name: "Tesla", Type: "Vehicle"
- Text: "Google's search engine" Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
- Text: "Microsoft Office suite" Name: "Microsoft Office", Type: "Software"
- Text: "John's startup company" Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
**INCORRECT Examples:**
- Text: "Tesla car" Name: "Tesla car", Type: "Vehicle"
- Text: "authentication system" Name: "authentication system", Type: "System"
- Text: "payment service" Name: "payment service", Type: "Service"
Format your response as a JSON object with the following structure:
<output>
@ -98,17 +115,37 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli
- For pronouns that refer to named entities, extract them as separate Alias entities.
2. **Entity Classification**:
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
- Assign the appropriate type for each one.
- Classify pronouns (I, me, you, etc.) as Alias entities.
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context.
- Each entity should have exactly ONE type that best describes what it is.
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
3. **Exclusions**:
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
- Do NOT extract dates, times, or other temporal informationthese will be handled separately.
4. **Formatting**:
- Be **explicit and unambiguous** when naming entities (e.g., use full names when available).
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
4. **Entity Name Extraction**:
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
## Examples of Correct Entity Extraction:
**CORRECT Examples:**
- Text: "Tesla car" Name: "Tesla", Type: "Vehicle"
- Text: "Google's search engine" Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
- Text: "Microsoft Office suite" Name: "Microsoft Office", Type: "Software"
- Text: "John's startup company" Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
**INCORRECT Examples:**
- Text: "Tesla car" Name: "Tesla car", Type: "Vehicle"
- Text: "authentication system" Name: "authentication system", Type: "System"
- Text: "payment service" Name: "payment service", Type: "Service"
Format your response as a JSON object with the following structure:
<output>
@ -167,7 +204,10 @@ Guidelines:
1. Extract significant entities, concepts, or actors mentioned in the content.
2. Avoid creating nodes for relationships or actions.
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
4. **CLEAN ENTITY NAMES**: Extract ONLY the core entity name WITHOUT type descriptors:
- "Tesla car" Name: "Tesla", Type: "Vehicle"
- Remove words like "app", "system", "platform", "tool", "service", "company" from entity names
5. Use full names when available and avoid abbreviations.
${context.customPrompt || ""}
`;
@ -186,7 +226,17 @@ export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
{
role: "system",
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
## CRITICAL RULE: Entity Type Matters
DO NOT mark entities with different types as duplicates, even if they have identical names.
- DO NOT mark "John" (Person) and "John" (Company) as duplicates
- DO NOT mark "Apple" (Company) and "Apple" (Fruit) as duplicates
- DO NOT mark "Core" (App) and "Core" (Concept) as duplicates
Consider entities as potential duplicates ONLY if they have:
1. Similar or identical names AND
2. The EXACT SAME entity type
Each entity in ENTITIES is represented as a JSON object with the following structure:
{
id: integer id of the entity,
@ -203,18 +253,55 @@ Each entity in ENTITIES is represented as a JSON object with the following struc
]
}
For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
Do NOT mark entities as duplicates if:
- They are related but distinct.
- They have similar names or purposes but refer to separate instances or concepts.
## Duplication Decision Rules
For each entity, determine if it is a duplicate of any of its duplication candidates:
Task:
Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity.
### MARK AS DUPLICATE (duplicate_idx >= 0) when:
- Verify the candidate has the SAME entity_type as the current entity
- AND confirm the entities refer to the same real-world object or concept
- AND check that the names are very similar or identical
### SPECIAL RULE FOR PREDICATES:
**ALWAYS mark identical predicates as duplicates** - predicates are universal and reusable:
- Mark "is associated with" (Predicate) vs "is associated with" (Predicate) duplicate_idx = 0
- Mark "works for" (Predicate) vs "works for" (Predicate) duplicate_idx = 0
- Mark "owns" (Predicate) vs "owns" (Predicate) duplicate_idx = 0
### DO NOT mark as duplicate (duplicate_idx = -1) when:
- Confirm the candidate has a DIFFERENT entity_type (even with identical names)
- Identify they are related but distinct entities
- Recognize they have similar names or purposes but refer to separate instances or concepts
- Distinguish when one is a general concept and the other is a specific instance
- **EXCEPTION**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
## Examples:
**CORRECT - Mark as NOT Duplicates (Different Types):**
- Set "Tesla" (Company) vs "Tesla" (Car) duplicate_idx = -1
- Set "Apple" (Company) vs "Apple" (Fruit) duplicate_idx = -1
- Set "Core" (App) vs "Core" (System) duplicate_idx = -1
**CORRECT - Mark Predicates AS Duplicates (Same Name, Same Type):**
- Set "is associated with" (Predicate) vs "is associated with" (Predicate) duplicate_idx = 0
- Set "works for" (Predicate) vs "works for" (Predicate) duplicate_idx = 0
- Set "owns" (Predicate) vs "owns" (Predicate) duplicate_idx = 0
**CORRECT - Evaluate Potential Duplicates (Same Type):**
- Check if "John Smith" (Person) vs "John Smith" (Person) refer to same person
- Check if "Microsoft" (Company) vs "Microsoft Corporation" (Company) are the same company
- Check if "iPhone" (Product) vs "Apple iPhone" (Product) are the same product
**CORRECT - Mark as NOT Duplicates (Same Type, Different Instances):**
- Set "Meeting" (Event) vs "Meeting" (Event) duplicate_idx = -1 (different meetings)
- Set "Project" (Task) vs "Project" (Task) duplicate_idx = -1 (different projects)
- **NOTE**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
## Task:
Provide your response as a JSON object with an "entity_resolutions" array containing one entry for each entity.
For each entity, include:
- "id": the id of the entity (integer)
- "name": the name of the entity (string)
- "name": the name of the entity (string)
- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
Format your response as follows:
@ -231,10 +318,12 @@ Format your response as follows:
}
</output>
Notes:
- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate.
- If an entity is not a duplicate of any candidate, set duplicate_idx to -1.
- Always include all entities from the input in your response.
## Important Instructions:
- FIRST check if entity types match before considering any duplication
- If entity types don't match, immediately set duplicate_idx = -1
- Only mark entities with identical types as potential duplicates
- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
- Always include all entities from the input in your response
- Always wrap the output in these tags <output> </output>
`,
},

View File

@ -4,31 +4,66 @@ export const normalizePrompt = (
context: Record<string, any>,
): CoreMessage[] => {
const sysPrompt = `
You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Your task is to convert input informationsuch as user input, system events, or assistant actionsinto clear, concise, third-person factual statements suitable for storage in a memory graph. These statements should be easily understandable and retrievable by any system or agent.
You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Convert input information into clear, concise, third-person factual statements that EVOLVE the memory graph by forming new relationships and capturing new information.
## Core Processing Philosophy
When related memories are provided, make memory graph evolution your PRIMARY GOAL, NOT information storage:
- **EVOLVE**: Focus on new information that adds relationships or updates existing knowledge
- **CONNECT**: Form explicit relationships between new and existing information
- **FILTER**: Aggressively exclude information already captured in related memories
- **ENHANCE**: Use existing knowledge to clarify new information and form connections
## Memory Processing Guidelines
- Always output memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
- Output all memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
- Convert input information into clear, concise memory statements.
- Maintain a neutral, factual tone in all memory entries.
- Structure memories as factual statements, not questions.
- Include relevant context and temporal information when available.
- When ingesting from assistant's perspective, ensure you still capture the complete user-assistant interaction context.
- When ingesting from assistant's perspective, capture the complete user-assistant interaction context.
## Complete Conversational Context
- IMPORTANT: Always preserve the complete context of conversations, including BOTH:
- IMPORTANT: Preserve the complete context of conversations, including BOTH:
- What the user said, asked, or requested
- How the assistant responded or what it suggested
- Any decisions, conclusions, or agreements reached
- Do not focus solely on the assistant's contributions while ignoring user context
- Capture the cause-and-effect relationship between user inputs and assistant responses
- For multi-turn conversations, preserve the logical flow and key points from each turn
- When the user provides information, always record that information directly, not just how the assistant used it
- When the user provides information, record that information directly, not just how the assistant used it
## Node Entity Types
${context.entityTypes}
## Related Memory Processing Strategy
When related memories are provided, apply this filtering and enhancement strategy:
### 1. INFORMATION FILTERING (What NOT to Include)
- **Already Captured Facts**: Do not repeat information already present in related memories unless it adds new context
- **Static Relationships**: Skip relationships already established (e.g., "John is co-founder" if already captured)
- **Redundant Details**: Exclude details that don't add new understanding or connections
- **Background Context**: Filter out explanatory information that's already in the memory graph
### 2. RELATIONSHIP FORMATION (What TO Include)
- **New Connections**: Include explicit relationships between entities mentioned in current and related episodes
- **Evolving Relationships**: Capture changes or updates to existing relationships
- **Cross-Context Links**: Form connections that bridge different contexts or time periods
- **Causal Relationships**: Extract how current information affects or is affected by existing knowledge
### 3. NEW INFORMATION EXTRACTION (Priority Focus)
- **Fresh Facts**: Extract information not present in any related memory
- **Updated Status**: Capture changes to previously captured information
- **New Attributes**: Add additional properties or characteristics of known entities
- **Temporal Updates**: Record time-based changes or progressions
- **Contextual Additions**: Include new contexts or situations involving known entities
### 4. MEMORY GRAPH EVOLUTION PATTERNS
- **Entity Enhancement**: Add new properties to existing entities without repeating known ones
- **Relationship Expansion**: Create new relationship types between known entities
- **Network Growth**: Connect previously isolated memory clusters
- **Knowledge Refinement**: Update or correct existing information with new insights
## Memory Selection Criteria
Evaluate conversations based on these priority categories:
Evaluate conversations using these priority categories:
### 1. High Priority (Always Remember)
- **User Preferences**: Explicit likes, dislikes, settings, or preferences
@ -97,59 +132,74 @@ Evaluate conversations based on these priority categories:
- **QA/Troubleshooting**: Conversations clearly intended for testing or debugging purposes
- **Internal Processing**: Comments about the assistant's own thinking process
## Related Knowledge Integration
- Consider these related episodes when processing new information:
## Enhanced Processing for Related Memories
When related memories are provided:
- Look for connections between new information and these existing memories
- Identify patterns, contradictions, or evolving preferences
- Reference related episodes when they provide important context
- Update or refine existing knowledge with new information
### Step 1: Analyze Existing Knowledge
- Identify all entities, relationships, and facts already captured
- Map the existing knowledge structure
- Note any gaps or areas for enhancement
## Memory Graph Integration
- Each memory will be converted to a node in the memory graph.
- Include relevant relationships between memory items when possible.
- Specify temporal aspects when memories are time-sensitive.
- Format memories to support efficient retrieval by any system or agent.
### Step 2: Extract Novel Information
- Filter current episode for information NOT in related memories
- Identify new entities, attributes, or relationships
- Focus on information that adds value to the memory graph
## Related Knowledge Integration
- Consider these related episodes and facts when processing new information:
- When related facts or episodes are provided, carefully analyze them for:
- **Connections**: Identify relationships between new information and existing memories
- **Patterns**: Recognize recurring themes, preferences, or behaviors
- **Contradictions**: Note when new information conflicts with existing knowledge
- **Evolution**: Track how user preferences or situations change over time
- **Context**: Use related memories to better understand the significance of new information
- Incorporate relevant context from related memories when appropriate
- Update or refine existing knowledge with new information
- When contradictions exist, note both the old and new information with timestamps
- Use related memories to determine the priority level of new information
- If related memories suggest a topic is important to the user, elevate its priority
### Step 3: Form Strategic Relationships
- Connect new entities to existing ones through explicit relationships
- Convert implicit connections into explicit memory statements
- Bridge knowledge gaps using new information
### Step 4: Evolve Existing Knowledge
- Update outdated information with new details
- Add new attributes to known entities
- Expand relationship networks with new connections
## Making Implicit Relationships Explicit
- **Entity Disambiguation**: When same names appear across contexts, use related memories to clarify relationships
- **Possessive Language**: Convert possessive forms into explicit relationships using related memory context
- **Cross-Reference Formation**: Create explicit links between entities that appear in multiple episodes
- **Temporal Relationship**: Establish time-based connections between related events or decisions
## Information Prioritization with Related Memories
- **HIGHEST PRIORITY**: New relationships between known entities
- **HIGH PRIORITY**: New attributes or properties of known entities
- **MEDIUM PRIORITY**: New entities with connections to existing knowledge
- **LOW PRIORITY**: Standalone new information without clear connections
- **EXCLUDE**: Information already captured in related memories that doesn't add new connections
## Output Format
When extracting memory-worthy information:
1. If nothing meets the criteria for storage, respond with exactly: "NOTHING_TO_REMEMBER"
1. If nothing meets the criteria for storage (especially after filtering against related memories), respond with exactly: "NOTHING_TO_REMEMBER"
2. Otherwise, provide a summary that:
- **Scales with conversation complexity**:
* For simple exchanges with 1-2 key points: Use 1-2 concise sentences
* For moderate complexity with 3-5 key points: Use 3-5 sentences, organizing related information
* For complex conversations with many important details: Use up to 8-10 sentences, structured by topic
- Focuses on facts rather than interpretations
- Uses the third person perspective
- Includes specific details (names, dates, numbers) when relevant
- Avoids unnecessary context or explanation
- Formats key information as attribute-value pairs when appropriate
- Uses bullet points for multiple distinct pieces of information
- **Prioritizes NEW information**: Focus on facts not present in related memories
- **Emphasizes relationships**: Highlight connections between new and existing information
- **Scales with novelty**: Make length reflect amount of genuinely new, valuable information
- **Uses third person perspective**: Maintain neutral, factual tone
- **Includes specific details**: Include names, dates, numbers when they add new value
- **Avoids redundancy**: Skip information already captured in related memories
- **Forms explicit connections**: Make relationships between entities clear and direct
## Examples of Complete Context Extraction
- INCOMPLETE: "Assistant suggested Italian restaurants in downtown."
- COMPLETE: "User asked for restaurant recommendations in downtown. Assistant suggested three Italian restaurants: Bella Vita, Romano's, and Trattoria Milano."
## Examples of Memory Graph Evolution
- INCOMPLETE: "Assistant provided information about Python functions."
- COMPLETE: "User asked how to define functions in Python. Assistant explained the syntax using 'def' keyword and provided an example of a function that calculates the factorial of a number."
### Before (Redundant Approach):
Related Memory: "John Smith is the co-founder of TechCorp."
Current Episode: "User discussed project timeline with John, the co-founder."
BAD Output: "User discussed project timeline with John Smith, who is the co-founder of TechCorp."
When processing new information for memory storage, focus on extracting the core facts, preferences, and events that will be most useful for future reference by any system or agent.
### After (Evolution Approach):
Related Memory: "John Smith is the co-founder of TechCorp."
Current Episode: "User discussed project timeline with John, the co-founder."
GOOD Output: "User discussed project timeline with John Smith. The project timeline discussion involved TechCorp's co-founder."
### Relationship Formation Example:
Related Memory: "User prefers morning meetings."
Current Episode: "User scheduled a meeting with John for 9 AM."
Output: "User scheduled a 9 AM meeting with John Smith, aligning with their preference for morning meetings."
Process information with related memories by focusing on evolving the memory graph through new connections and information rather than repeating already captured facts.
<output>
{{processed_statement}}

View File

@ -12,7 +12,7 @@ export const extractStatements = (
return [
{
role: "system",
content: `You are a knowledge graph expert who extracts factual statements from text as subject-predicate-object triples.
content: `You are a knowledge graph expert who extracts NEW factual statements from text as subject-predicate-object triples.
CRITICAL REQUIREMENT:
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
@ -21,28 +21,60 @@ CRITICAL REQUIREMENT:
- DO NOT create, invent, or modify any entity names.
- NEVER create statements where the source and target are the same entity (no self-loops).
Your task is to identify important facts from the provided text and represent them in a knowledge graph format.
## PRIMARY MISSION: EXTRACT NEW RELATIONSHIPS
Focus on extracting factual statements that ADD NEW VALUE to the knowledge graph:
- **PRIORITIZE**: New relationships not already captured in previous episodes
- **EMPHASIZE**: Connections between entities with same names but different types
- **FILTER**: Avoid extracting facts already present in previous episodes
- **EVOLVE**: Form relationships that enhance the existing knowledge structure
Your task is to identify NEW important facts from the provided text and represent them in a knowledge graph format.
Follow these instructions:
1. First, carefully review the AVAILABLE ENTITIES list. These are the ONLY entities you can use as subjects and objects.
2. Identify factual statements that can be expressed using ONLY these available entities.
3. For each valid statement, provide:
1. **ANALYZE PREVIOUS EPISODES**: Review previous episodes to understand what relationships already exist
2. **REVIEW AVAILABLE ENTITIES**: Carefully examine the AVAILABLE ENTITIES list - these are the ONLY entities you can use as subjects and objects
3. **IDENTIFY SAME-NAME ENTITIES**: Look for entities with identical names but different types - these often represent natural relationships that should be explicitly connected
4. **EXTRACT NEW RELATIONSHIPS**: Identify factual statements that can be expressed using ONLY available entities AND are NOT already captured in previous episodes
5. For each NEW valid statement, provide:
- source: The subject entity (MUST be from AVAILABLE ENTITIES)
- predicate: The relationship type (can be a descriptive phrase)
- target: The object entity (MUST be from AVAILABLE ENTITIES)
EXTRACT ALL MEANINGFUL RELATIONSHIPS:
- Extract any meaningful relationship between available entities that's expressed in the text.
- Use predicates that accurately describe the relationship between entities.
- Be creative but precise in identifying relationships - don't miss important facts.
- Common examples include (but are not limited to):
* Ownership or association (e.g., "Taylor Swift" "performs at" "Taylor Swift's concert")
* Participation or attendance (e.g., "John" "attends" "Conference")
* Personal connections (e.g., "John" "is friend of" "Max")
* Aliases (e.g., "John" "is also known as" "John Smith")
* Locations (e.g., "Company" "headquartered in" "City")
* Characteristics (e.g., "Product" "has feature" "Feature")
EXTRACT NEW MEANINGFUL RELATIONSHIPS:
- Extract meaningful relationships between available entities that are NOT already captured in previous episodes
- Use predicates that accurately describe new relationships between entities
- Be creative but precise in identifying NEW relationships - focus on value-adding connections
- **HIGHEST PRIORITY**: Entities with identical names but different types MUST be connected with explicit relationship statements
- **MANDATORY**: When you find entities like "John (Person)" and "John (Company)", create explicit relationships such as "John" "owns" "John" or "John" "founded" "John"
- Look for both explicit and implicit NEW relationships mentioned in the text
- **FILTER OUT**: Relationships already established in previous episodes unless they represent updates or changes
- Common relationship types include (but are not limited to):
* Ownership or association (e.g., "Alice" "owns" "Restaurant")
* Participation or attendance (e.g., "Team" "participates in" "Tournament")
* Personal connections (e.g., "Sarah" "works with" "Michael")
* Aliases and alternative names (e.g., "Robert" "is also known as" "Bob")
* Locations and spatial relationships (e.g., "Office" "located in" "Building")
* Characteristics and properties (e.g., "System" "has property" "Scalability")
* Product-organization relationships (e.g., "Software" "developed by" "Company")
* Technical dependencies and usage (e.g., "Application" "uses" "Database")
* Hierarchical relationships (e.g., "Manager" "supervises" "Employee")
## SAME-NAME ENTITY RELATIONSHIP FORMATION
When entities share identical names but have different types, CREATE explicit relationship statements:
- **Person-Organization**: "John (Person)" "owns", "founded", "works for", or "leads" "John (Company)"
- **Person-Location**: "Smith (Person)" "lives in", "founded", or "is associated with" "Smith (City)"
- **Event-Location**: "Conference (Event)" "takes place at" or "is hosted by" "Conference (Venue)"
- **Product-Company**: "Tesla (Product)" "is manufactured by" or "is developed by" "Tesla (Company)"
- **MANDATORY**: Always create at least one relationship statement for same-name entities
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
## PREVIOUS EPISODE FILTERING
Before creating any relationship statement:
- **CHECK**: Review previous episodes to see if this exact relationship already exists
- **SKIP**: Do not create statements that duplicate existing relationships
- **ENHANCE**: Only create statements if they add new information or represent updates
- **FOCUS**: Prioritize completely new connections not represented in the knowledge graph
ABOUT TEMPORAL INFORMATION:
- For events with dates/times, DO NOT create a separate statement with the event as both source and target.
@ -56,8 +88,10 @@ Format your response as a JSON object with the following structure:
"edges": [
{
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
"sourceType": "[Source Entity Type]",
"predicate": "[Relationship Type]",
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
"targetType": "[Target Entity Type]",
"fact": "[Natural language representation of the fact]",
"attributes": {
"confidence": confidence of the fact
@ -69,21 +103,25 @@ Format your response as a JSON object with the following structure:
</output>
IMPORTANT RULES:
- ONLY use entities from AVAILABLE ENTITIES as source and target.
- NEVER create statements where source or target is not in AVAILABLE ENTITIES.
- NEVER create statements where the source and target are the same entity (NO SELF-LOOPS).
- Instead of creating self-loops for temporal information, add timespan attributes to relevant statements.
- If you cannot express a fact using only available entities, omit it entirely.
- Always wrap output in tags <output> </output>.
- **ENTITIES**: ONLY use entities from AVAILABLE ENTITIES as source and target
- **NO INVENTION**: NEVER create statements where source or target is not in AVAILABLE ENTITIES
- **NO SELF-LOOPS**: NEVER create statements where the source and target are the same entity
- **SAME-NAME PRIORITY**: When entities share names but have different types, CREATE explicit relationship statements between them
- **NEW ONLY**: Do NOT create statements that duplicate relationships already present in previous episodes
- **TEMPORAL**: Instead of creating self-loops for temporal information, add timespan attributes to relevant statements
- **FILTER FIRST**: If you cannot express a NEW fact using only available entities, omit it entirely
- **OUTPUT FORMAT**: Always wrap output in tags <output> </output>
Example of CORRECT usage:
If AVAILABLE ENTITIES contains ["John", "Max", "Wedding"], you can create:
- "John" "attends" "Wedding"
- "Max" "married to" "Tina" with timespan attribute
If AVAILABLE ENTITIES contains ["John", "Max", "Wedding", "John (Company)"], you can create:
- "John" "attends" "Wedding" (if not already in previous episodes)
- "Max" "married to" "Tina" with timespan attribute (if new relationship)
- "John" "founded" "John (Company)" (PRIORITY: same name, different types)
Example of INCORRECT usage:
- "John" "attends" "Party" (if "Party" is not in AVAILABLE ENTITIES)
- "Marriage" "occurs on" "Marriage" (NEVER create self-loops)
- "John" "attends" "Wedding" (if already captured in previous episodes)
- "January 14" "is" "Marriage date" (if "January 14" or "Marriage date" is not in AVAILABLE ENTITIES)`,
},
{

View File

@ -447,3 +447,55 @@ export function getNodeTypesString(apps: Array<keyof typeof APP_NODE_TYPES>) {
export function getNodeAttributesString(
apps: Array<keyof typeof APP_NODE_TYPES>,
) {}
/**
* Check if a type is a preset type (from GENERAL_NODE_TYPES or APP_NODE_TYPES)
*/
export function isPresetType(
type: string,
apps: Array<keyof typeof APP_NODE_TYPES> = [],
): boolean {
// Check general types
const generalTypes = Object.keys(GENERAL_NODE_TYPES).map(
(key) => GENERAL_NODE_TYPES[key as keyof typeof GENERAL_NODE_TYPES].name,
);
if (generalTypes.includes(type as any)) {
return true;
}
// Check app-specific types
for (const app of apps) {
const appTypes = Object.keys(APP_NODE_TYPES[app] || {}).map(
(key) =>
APP_NODE_TYPES[app][key as keyof (typeof APP_NODE_TYPES)[typeof app]]
.name,
);
if (appTypes.includes(type as any)) {
return true;
}
}
return false;
}
/**
* Get all preset types for given apps
*/
export function getAllPresetTypes(
apps: Array<keyof typeof APP_NODE_TYPES> = [],
): string[] {
const generalTypes = Object.keys(GENERAL_NODE_TYPES).map(
(key) => GENERAL_NODE_TYPES[key as keyof typeof GENERAL_NODE_TYPES].name,
);
const appTypes = apps.flatMap((app) =>
Object.keys(APP_NODE_TYPES[app] || {}).map(
(key) =>
APP_NODE_TYPES[app][key as keyof (typeof APP_NODE_TYPES)[typeof app]]
.name,
),
);
return [...generalTypes, ...appTypes];
}

View File

@ -29,9 +29,10 @@ export interface EpisodicNode {
export interface EntityNode {
uuid: string;
name: string;
type: string;
type: string; // Single type - either from presets or custom
attributes: Record<string, any>;
nameEmbedding: number[];
typeEmbedding: number[];
createdAt: Date;
userId: string;
space?: string;
@ -82,3 +83,13 @@ export type AddEpisodeResult = {
statementsCreated: number;
processingTimeMs: number;
};
export interface ExtractedTripleData {
source: string;
sourceType: string;
predicate: string;
target: string;
targetType: string;
fact: string;
attributes?: Record<string, any>;
}