mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-10 23:48:26 +00:00
refactor: make entity handling type-free and simplify entity resolution in knowledge graph
This commit is contained in:
parent
6ddcab873a
commit
952386ca0e
@ -2,19 +2,9 @@ import type { EntityNode } from "@core/types";
|
||||
import { runQuery } from "~/lib/neo4j.server";
|
||||
|
||||
export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
// Debug: Log entity to identify missing typeEmbedding
|
||||
if (!entity.typeEmbedding) {
|
||||
console.error(`Entity missing typeEmbedding:`, {
|
||||
uuid: entity.uuid,
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
hasNameEmbedding: !!entity.nameEmbedding,
|
||||
});
|
||||
throw new Error(
|
||||
`Entity ${entity.name} (${entity.type}) is missing typeEmbedding`,
|
||||
);
|
||||
}
|
||||
|
||||
// Build query conditionally based on whether typeEmbedding exists
|
||||
const hasTypeEmbedding = entity.typeEmbedding && entity.typeEmbedding.length > 0;
|
||||
|
||||
const query = `
|
||||
MERGE (n:Entity {uuid: $uuid})
|
||||
ON CREATE SET
|
||||
@ -22,7 +12,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
n.type = $type,
|
||||
n.attributes = $attributes,
|
||||
n.nameEmbedding = $nameEmbedding,
|
||||
n.typeEmbedding = $typeEmbedding,
|
||||
${hasTypeEmbedding ? 'n.typeEmbedding = $typeEmbedding,' : ''}
|
||||
n.createdAt = $createdAt,
|
||||
n.userId = $userId,
|
||||
n.space = $space
|
||||
@ -31,23 +21,27 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
|
||||
n.type = $type,
|
||||
n.attributes = $attributes,
|
||||
n.nameEmbedding = $nameEmbedding,
|
||||
n.typeEmbedding = $typeEmbedding,
|
||||
${hasTypeEmbedding ? 'n.typeEmbedding = $typeEmbedding,' : ''}
|
||||
n.space = $space
|
||||
RETURN n.uuid as uuid
|
||||
`;
|
||||
|
||||
const params = {
|
||||
const params: any = {
|
||||
uuid: entity.uuid,
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
type: entity.type || "",
|
||||
attributes: JSON.stringify(entity.attributes || {}),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
createdAt: entity.createdAt.toISOString(),
|
||||
userId: entity.userId,
|
||||
space: entity.space || null,
|
||||
};
|
||||
|
||||
// Add typeEmbedding to params only if it exists
|
||||
if (hasTypeEmbedding) {
|
||||
params.typeEmbedding = entity.typeEmbedding;
|
||||
}
|
||||
|
||||
const result = await runQuery(query, params);
|
||||
return result[0].get("uuid");
|
||||
}
|
||||
@ -65,10 +59,10 @@ export async function getEntity(uuid: string): Promise<EntityNode | null> {
|
||||
return {
|
||||
uuid: entity.uuid,
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
type: entity.type || null,
|
||||
attributes: JSON.parse(entity.attributes || "{}"),
|
||||
nameEmbedding: entity.nameEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding,
|
||||
typeEmbedding: entity.typeEmbedding || null,
|
||||
createdAt: new Date(entity.createdAt),
|
||||
userId: entity.userId,
|
||||
space: entity.space,
|
||||
|
||||
@ -15,8 +15,7 @@ import crypto from "crypto";
|
||||
import {
|
||||
dedupeNodes,
|
||||
extractAttributes,
|
||||
extractMessage,
|
||||
extractText,
|
||||
extractEntities,
|
||||
} from "./prompts/nodes";
|
||||
import {
|
||||
extractStatements,
|
||||
@ -25,14 +24,11 @@ import {
|
||||
import {
|
||||
getEpisodeStatements,
|
||||
getRecentEpisodes,
|
||||
getRelatedEpisodesEntities,
|
||||
searchEpisodesByEmbedding,
|
||||
} from "./graphModels/episode";
|
||||
import {
|
||||
findExactPredicateMatches,
|
||||
findSimilarEntities,
|
||||
findSimilarEntitiesWithSameType,
|
||||
replaceEntityReferences,
|
||||
} from "./graphModels/entity";
|
||||
import {
|
||||
findContradictoryStatements,
|
||||
@ -47,9 +43,7 @@ import { getEmbedding, makeModelCall } from "~/lib/model.server";
|
||||
import { runQuery } from "~/lib/neo4j.server";
|
||||
import {
|
||||
Apps,
|
||||
getNodeTypes,
|
||||
getNodeTypesString,
|
||||
isPresetType,
|
||||
} from "~/utils/presets/nodes";
|
||||
import { normalizePrompt, normalizeDocumentPrompt } from "./prompts";
|
||||
import { type PrismaClient } from "@prisma/client";
|
||||
@ -272,8 +266,8 @@ export class KnowledgeGraphService {
|
||||
params.type,
|
||||
);
|
||||
|
||||
const normalizedTime = Date.now() - startTime;
|
||||
logger.log(`Normalized episode body in ${normalizedTime} ms`);
|
||||
const normalizedTime = Date.now();
|
||||
logger.log(`Normalized episode body in ${normalizedTime - startTime} ms`);
|
||||
|
||||
if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
|
||||
logger.log("Nothing to remember");
|
||||
@ -284,15 +278,6 @@ export class KnowledgeGraphService {
|
||||
};
|
||||
}
|
||||
|
||||
const relatedEpisodesEntities = await getRelatedEpisodesEntities({
|
||||
embedding: await this.getEmbedding(normalizedEpisodeBody),
|
||||
userId: params.userId,
|
||||
minSimilarity: 0.7,
|
||||
});
|
||||
|
||||
const relatedTime = Date.now() - normalizedTime;
|
||||
logger.log(`Related episodes entities in ${relatedTime} ms`);
|
||||
|
||||
// Step 2: Episode Creation - Create or retrieve the episode
|
||||
const episode: EpisodicNode = {
|
||||
uuid: crypto.randomUUID(),
|
||||
@ -316,23 +301,18 @@ export class KnowledgeGraphService {
|
||||
);
|
||||
|
||||
const extractedTime = Date.now();
|
||||
logger.log(`Extracted entities in ${extractedTime - relatedTime} ms`);
|
||||
logger.log(`Extracted entities in ${extractedTime - normalizedTime} ms`);
|
||||
|
||||
// Step 3.1: Context-aware entity resolution with preset type evolution
|
||||
await this.resolveEntitiesWithContext(
|
||||
extractedNodes,
|
||||
relatedEpisodesEntities,
|
||||
);
|
||||
|
||||
// Step 3.2: Handle preset type logic - expand entities for statement extraction
|
||||
const categorizedEntities = await this.expandEntitiesForStatements(
|
||||
extractedNodes,
|
||||
episode,
|
||||
);
|
||||
// Step 3.1: Simple entity categorization (no type-based expansion needed)
|
||||
const categorizedEntities = {
|
||||
primary: extractedNodes,
|
||||
expanded: [], // No expansion needed with type-free approach
|
||||
};
|
||||
|
||||
const expandedTime = Date.now();
|
||||
logger.log(`Expanded entities in ${expandedTime - extractedTime} ms`);
|
||||
logger.log(`Processed entities in ${expandedTime - extractedTime} ms`);
|
||||
|
||||
console.log(extractedNodes.map((e) => e.name));
|
||||
// Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges
|
||||
const extractedStatements = await this.extractStatements(
|
||||
episode,
|
||||
@ -371,10 +351,12 @@ export class KnowledgeGraphService {
|
||||
);
|
||||
|
||||
// Step 7: ADd attributes to entity nodes
|
||||
const updatedTriples = await this.addAttributesToEntities(
|
||||
resolvedStatements,
|
||||
episode,
|
||||
);
|
||||
// const updatedTriples = await this.addAttributesToEntities(
|
||||
// resolvedStatements,
|
||||
// episode,
|
||||
// );
|
||||
|
||||
const updatedTriples = resolvedStatements;
|
||||
|
||||
const updatedTriplesTime = Date.now();
|
||||
logger.log(
|
||||
@ -439,12 +421,6 @@ export class KnowledgeGraphService {
|
||||
episode: EpisodicNode,
|
||||
previousEpisodes: EpisodicNode[],
|
||||
): Promise<EntityNode[]> {
|
||||
// Get all app keys
|
||||
const allAppEnumValues = Object.values(Apps);
|
||||
|
||||
// Get all node types
|
||||
const entityTypes = getNodeTypes(allAppEnumValues);
|
||||
|
||||
// Use the prompt library to get the appropriate prompts
|
||||
const context = {
|
||||
episodeContent: episode.content,
|
||||
@ -452,13 +428,11 @@ export class KnowledgeGraphService {
|
||||
content: ep.content,
|
||||
createdAt: ep.createdAt.toISOString(),
|
||||
})),
|
||||
entityTypes: entityTypes,
|
||||
};
|
||||
|
||||
// Get the extract_json prompt from the prompt library
|
||||
const messages = episode.sessionId
|
||||
? extractMessage(context)
|
||||
: extractText(context);
|
||||
// Get the unified entity extraction prompt
|
||||
const extractionMode = episode.sessionId ? 'conversation' : 'document';
|
||||
const messages = extractEntities(context, extractionMode);
|
||||
|
||||
let responseText = "";
|
||||
|
||||
@ -474,21 +448,19 @@ export class KnowledgeGraphService {
|
||||
responseText = outputMatch[1].trim();
|
||||
const extractedEntities = JSON.parse(responseText || "{}").entities || [];
|
||||
|
||||
// Batch generate embeddings for better performance
|
||||
// Batch generate embeddings for entity names
|
||||
const entityNames = extractedEntities.map((entity: any) => entity.name);
|
||||
const entityTypes = extractedEntities.map((entity: any) => entity.type);
|
||||
const [nameEmbeddings, typeEmbeddings] = await Promise.all([
|
||||
Promise.all(entityNames.map((name: string) => this.getEmbedding(name))),
|
||||
Promise.all(entityTypes.map((type: string) => this.getEmbedding(type))),
|
||||
]);
|
||||
const nameEmbeddings = await Promise.all(
|
||||
entityNames.map((name: string) => this.getEmbedding(name))
|
||||
);
|
||||
|
||||
entities = extractedEntities.map((entity: any, index: number) => ({
|
||||
uuid: crypto.randomUUID(),
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
type: undefined, // Type will be inferred from statements
|
||||
attributes: entity.attributes || {},
|
||||
nameEmbedding: nameEmbeddings[index],
|
||||
typeEmbedding: typeEmbeddings[index],
|
||||
typeEmbedding: undefined, // No type embedding needed
|
||||
createdAt: new Date(),
|
||||
userId: episode.userId,
|
||||
}));
|
||||
@ -537,6 +509,8 @@ export class KnowledgeGraphService {
|
||||
responseText = text;
|
||||
});
|
||||
|
||||
console.log(responseText);
|
||||
|
||||
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
||||
if (outputMatch && outputMatch[1]) {
|
||||
responseText = outputMatch[1].trim();
|
||||
@ -548,6 +522,8 @@ export class KnowledgeGraphService {
|
||||
const extractedTriples: ExtractedTripleData[] =
|
||||
JSON.parse(responseText || "{}").edges || [];
|
||||
|
||||
console.log(`extracted triples length: ${extractedTriples.length}`)
|
||||
|
||||
// Create maps to deduplicate entities by name within this extraction
|
||||
const predicateMap = new Map<string, EntityNode>();
|
||||
|
||||
@ -597,17 +573,13 @@ export class KnowledgeGraphService {
|
||||
// Convert extracted triples to Triple objects with Statement nodes
|
||||
const triples = extractedTriples.map(
|
||||
(triple: ExtractedTripleData, tripleIndex: number) => {
|
||||
// Find the subject and object nodes by matching both name and type
|
||||
// Find the subject and object nodes by matching name (type-free approach)
|
||||
const subjectNode = allEntities.find(
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.source.toLowerCase() &&
|
||||
node.type.toLowerCase() === triple.sourceType.toLowerCase(),
|
||||
(node) => node.name.toLowerCase() === triple.source.toLowerCase()
|
||||
);
|
||||
|
||||
const objectNode = allEntities.find(
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.target.toLowerCase() &&
|
||||
node.type.toLowerCase() === triple.targetType.toLowerCase(),
|
||||
(node) => node.name.toLowerCase() === triple.target.toLowerCase()
|
||||
);
|
||||
|
||||
// Get the deduplicated predicate node
|
||||
@ -661,108 +633,7 @@ export class KnowledgeGraphService {
|
||||
return triples.filter(Boolean) as Triple[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Expand entities for statement extraction by adding existing preset entities
|
||||
*/
|
||||
private async expandEntitiesForStatements(
|
||||
extractedNodes: EntityNode[],
|
||||
episode: EpisodicNode,
|
||||
): Promise<{
|
||||
primary: EntityNode[];
|
||||
expanded: EntityNode[];
|
||||
}> {
|
||||
const allAppEnumValues = Object.values(Apps);
|
||||
const expandedEntities: EntityNode[] = [];
|
||||
|
||||
// For each extracted entity, check if we need to add existing preset entities
|
||||
for (const entity of extractedNodes) {
|
||||
const newIsPreset = isPresetType(entity.type, allAppEnumValues);
|
||||
|
||||
// Find similar entities with same name
|
||||
const similarEntities = await findSimilarEntities({
|
||||
queryEmbedding: entity.nameEmbedding,
|
||||
limit: 5,
|
||||
threshold: 0.8,
|
||||
userId: episode.userId,
|
||||
});
|
||||
|
||||
for (const existingEntity of similarEntities) {
|
||||
const existingIsPreset = isPresetType(
|
||||
existingEntity.type,
|
||||
allAppEnumValues,
|
||||
);
|
||||
|
||||
// If both are preset types, include both for statement extraction
|
||||
if (newIsPreset && existingIsPreset) {
|
||||
// Add the existing entity to the list if not already present
|
||||
if (!expandedEntities.some((e) => e.uuid === existingEntity.uuid)) {
|
||||
expandedEntities.push(existingEntity);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate by name AND type combination
|
||||
const deduplicateEntities = (entities: EntityNode[]) => {
|
||||
const seen = new Map<string, EntityNode>();
|
||||
return entities.filter((entity) => {
|
||||
const key = `${entity.name.toLowerCase()}_${entity.type.toLowerCase()}`;
|
||||
if (seen.has(key)) {
|
||||
return false;
|
||||
}
|
||||
seen.set(key, entity);
|
||||
return true;
|
||||
});
|
||||
};
|
||||
|
||||
return {
|
||||
primary: deduplicateEntities(extractedNodes),
|
||||
expanded: deduplicateEntities(
|
||||
expandedEntities.filter(
|
||||
(e) => !extractedNodes.some((primary) => primary.uuid === e.uuid),
|
||||
),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve entities with context-aware deduplication and preset type evolution
|
||||
* Only merges entities that appear in semantically related episodes
|
||||
*/
|
||||
private async resolveEntitiesWithContext(
|
||||
extractedNodes: EntityNode[],
|
||||
relatedEpisodesEntities: EntityNode[],
|
||||
): Promise<void> {
|
||||
const allAppEnumValues = Object.values(Apps);
|
||||
|
||||
extractedNodes.map(async (newEntity) => {
|
||||
// Find same-name entities in related episodes (contextually relevant)
|
||||
const sameNameInContext = relatedEpisodesEntities.filter(
|
||||
(existing) =>
|
||||
existing.name.toLowerCase() === newEntity.name.toLowerCase(),
|
||||
);
|
||||
|
||||
if (sameNameInContext.length > 0) {
|
||||
let existingEntityIds: string[] = [];
|
||||
sameNameInContext.forEach(async (existingEntity) => {
|
||||
const newIsPreset = isPresetType(newEntity.type, allAppEnumValues);
|
||||
const existingIsPreset = isPresetType(
|
||||
existingEntity.type,
|
||||
allAppEnumValues,
|
||||
);
|
||||
|
||||
if (newIsPreset && !existingIsPreset) {
|
||||
// New is preset, existing is custom - evolve existing entity to preset type
|
||||
existingEntityIds.push(existingEntity.uuid);
|
||||
}
|
||||
});
|
||||
|
||||
if (existingEntityIds.length > 0) {
|
||||
await replaceEntityReferences(newEntity, existingEntityIds);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve extracted nodes to existing nodes or create new ones
|
||||
@ -835,9 +706,8 @@ export class KnowledgeGraphService {
|
||||
// Step 2a: Find similar entities for non-predicate entities
|
||||
const similarEntitiesResults = await Promise.all(
|
||||
nonPredicates.map(async (entity) => {
|
||||
const similarEntities = await findSimilarEntitiesWithSameType({
|
||||
const similarEntities = await findSimilarEntities({
|
||||
queryEmbedding: entity.nameEmbedding,
|
||||
entityType: entity.type,
|
||||
limit: 5,
|
||||
threshold: 0.7,
|
||||
userId: episode.userId,
|
||||
@ -1240,20 +1110,12 @@ export class KnowledgeGraphService {
|
||||
return triples; // No entities to process
|
||||
}
|
||||
|
||||
// Get all app keys
|
||||
const allAppEnumValues = Object.values(Apps);
|
||||
|
||||
// Get all node types with their attribute definitions
|
||||
const entityTypes = getNodeTypes(allAppEnumValues);
|
||||
|
||||
// Prepare simplified context for the LLM
|
||||
const context = {
|
||||
episodeContent: episode.content,
|
||||
entityTypes: entityTypes,
|
||||
entities: entities.map((entity) => ({
|
||||
uuid: entity.uuid,
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
currentAttributes: entity.attributes || {},
|
||||
})),
|
||||
};
|
||||
|
||||
@ -5,16 +5,20 @@
|
||||
import { type CoreMessage } from "ai";
|
||||
|
||||
/**
|
||||
* Extract entities from an episode using message-based approach
|
||||
* Extract entities from content using unified approach (works for both conversations and documents)
|
||||
*/
|
||||
export const extractMessage = (context: Record<string, any>): CoreMessage[] => {
|
||||
export const extractEntities = (
|
||||
context: Record<string, any>,
|
||||
extractionMode: 'conversation' | 'document' = 'conversation'
|
||||
): CoreMessage[] => {
|
||||
const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
|
||||
Your primary task is to extract and classify significant entities mentioned in the conversation.
|
||||
Your primary task is to extract all significant entities mentioned in the conversation, treating both concrete entities and type/concept entities as first-class nodes.
|
||||
|
||||
In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
|
||||
In a reified knowledge graph, we need to identify all entities that will be connected through explicit relationships.
|
||||
Focus on extracting:
|
||||
1. Subject entities (people, objects, concepts)
|
||||
2. Object entities (people, objects, concepts)
|
||||
1. Concrete entities (people, objects, specific instances)
|
||||
2. Type/concept entities (categories, classes, abstract concepts)
|
||||
3. All entities that participate in "X is a Y" relationships
|
||||
|
||||
Instructions:
|
||||
|
||||
@ -22,16 +26,16 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
||||
|
||||
1. **Entity Identification**:
|
||||
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE.
|
||||
- For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X).
|
||||
- For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as an Alias entity AND the named entity (X).
|
||||
- **ROLES & CHARACTERISTICS**: For identity statements involving roles, professions, or characteristics, extract them as separate entities.
|
||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||
- **TYPE/CONCEPT ENTITIES**: When text contains "X is a Y" statements, extract BOTH X and Y as separate entities.
|
||||
|
||||
2. **Entity Classification**:
|
||||
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
|
||||
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
|
||||
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context (e.g., "memory_graph_system", "authentication_bug").
|
||||
- Each entity should have exactly ONE type that best describes what it is.
|
||||
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
|
||||
2. **Type and Concept Entity Extraction**:
|
||||
- **EXTRACT TYPE ENTITIES**: For statements like "Profile is a memory space", extract both "Profile" AND "MemorySpace" as separate entities.
|
||||
- **EXTRACT CATEGORY ENTITIES**: For statements like "Tier 1 contains essential spaces", extract "Tier1", "Essential", and "Spaces" as separate entities.
|
||||
- **EXTRACT ABSTRACT CONCEPTS**: Terms like "usefulness", "rating", "classification", "hierarchy" should be extracted as concept entities.
|
||||
- **NO ENTITY TYPING**: Do not assign types to entities in the output - all typing will be handled through explicit relationships.
|
||||
|
||||
3. **Exclusions**:
|
||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||
@ -40,13 +44,13 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
||||
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
|
||||
|
||||
4. **Entity Name Extraction**:
|
||||
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
|
||||
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
|
||||
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
|
||||
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
|
||||
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
|
||||
- Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
|
||||
- When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car"
|
||||
- When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
|
||||
- **CLEAN NAMES**: Remove articles (a, an, the) and quantifiers, but preserve the core concept
|
||||
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you")
|
||||
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
|
||||
- **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space")
|
||||
|
||||
5. **Temporal and Relationship Context Extraction**:
|
||||
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
|
||||
@ -57,6 +61,19 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
||||
|
||||
## Examples of Correct Entity Extraction:
|
||||
|
||||
**TYPE/CONCEPT ENTITY EXTRACTION:**
|
||||
|
||||
✅ **EXTRACT BOTH ENTITIES IN "IS A" RELATIONSHIPS:**
|
||||
- Text: "Profile is a memory space" → Extract: "Profile" AND "MemorySpace"
|
||||
- Text: "Tesla is a car" → Extract: "Tesla" AND "Car"
|
||||
- Text: "John is a teacher" → Extract: "John" AND "Teacher"
|
||||
- Text: "Goals space connects to Projects" → Extract: "Goals", "Space", AND "Projects"
|
||||
|
||||
✅ **EXTRACT CONCEPT ENTITIES:**
|
||||
- Text: "rated 10/10 for usefulness" → Extract: "Usefulness", "Rating"
|
||||
- Text: "essential classification tier" → Extract: "Essential", "Classification", "Tier"
|
||||
- Text: "hierarchical memory system" → Extract: "Hierarchical", "Memory", "System"
|
||||
|
||||
**TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:**
|
||||
|
||||
✅ **EXTRACT - Relationship Temporal Information:**
|
||||
@ -73,47 +90,50 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
||||
- Text: "next week" → Don't extract "next week"
|
||||
|
||||
**RELATIONSHIP CONTEXT ENTITIES:**
|
||||
- Text: "my close friends" → Extract: "close friends" (QualifiedGroup)
|
||||
- Text: "strong support system" → Extract: "support system" (RelationshipType)
|
||||
- Text: "work colleagues" → Extract: "work colleagues" (ProfessionalGroup)
|
||||
- Text: "family members" → Extract: "family members" (FamilyGroup)
|
||||
- Text: "my close friends" → Extract: "Close Friends" (QualifiedGroup)
|
||||
- Text: "strong support system" → Extract: "Support System" (RelationshipType)
|
||||
- Text: "work colleagues" → Extract: "Work Colleagues" (ProfessionalGroup)
|
||||
- Text: "family members" → Extract: "Family Members" (FamilyGroup)
|
||||
|
||||
**STANDARD ENTITY EXTRACTION:**
|
||||
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
|
||||
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
|
||||
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
|
||||
- Text: "Tesla car" → Extract: "Tesla" AND "Car"
|
||||
- Text: "Google's search engine" → Extract: "Google" AND "Search Engine"
|
||||
- Text: "Microsoft Office suite" → Extract: "Microsoft Office" AND "Suite"
|
||||
- Text: "John's startup company" → Extract: "John", "Startup", AND "Company"
|
||||
|
||||
**INCORRECT Examples:**
|
||||
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
|
||||
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
|
||||
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
|
||||
**CORRECT vs INCORRECT Examples:**
|
||||
|
||||
✅ **CORRECT:**
|
||||
- Text: "Profile is a memory space" → Extract: "Profile", "MemorySpace"
|
||||
- Text: "essential classification system" → Extract: "Essential", "Classification", "System"
|
||||
- Text: "10/10 usefulness rating" → Extract: "Usefulness", "Rating"
|
||||
|
||||
❌ **INCORRECT:**
|
||||
- Text: "Profile is a memory space" → ❌ Only extract: "Profile"
|
||||
- Text: "authentication system" → ❌ Extract: "authentication system" (should be "Authentication", "System")
|
||||
- Text: "payment service" → ❌ Extract: "payment service" (should be "Payment", "Service")
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
{
|
||||
"entities": [
|
||||
{
|
||||
"name": "Entity Name",
|
||||
"type": "Entity Type",
|
||||
"name": "Entity Name"
|
||||
}
|
||||
// Additional entities...
|
||||
]
|
||||
}
|
||||
</output>`;
|
||||
|
||||
const contentLabel = extractionMode === 'conversation' ? 'CURRENT EPISODE' : 'TEXT';
|
||||
const userPrompt = `
|
||||
<PREVIOUS EPISODES>
|
||||
${extractionMode === 'conversation' ? `<PREVIOUS EPISODES>
|
||||
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
||||
</PREVIOUS EPISODES>
|
||||
|
||||
<CURRENT EPISODE>
|
||||
` : ''}<${contentLabel}>
|
||||
${context.episodeContent}
|
||||
</CURRENT EPISODE>
|
||||
|
||||
<ENTITY_TYPES>
|
||||
${JSON.stringify(context.entityTypes || {}, null, 2)}
|
||||
</ENTITY_TYPES>
|
||||
</${contentLabel}>
|
||||
|
||||
`;
|
||||
|
||||
@ -123,161 +143,6 @@ ${JSON.stringify(context.entityTypes || {}, null, 2)}
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract entities from text-based content
|
||||
*/
|
||||
export const extractText = (context: Record<string, any>): CoreMessage[] => {
|
||||
const sysPrompt = `
|
||||
You are an AI assistant that extracts entity nodes from text for a reified knowledge graph.
|
||||
Your primary task is to extract and classify significant entities mentioned in the provided text.
|
||||
|
||||
In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
|
||||
Focus on extracting:
|
||||
1. Subject entities
|
||||
2. Object entities
|
||||
|
||||
Instructions:
|
||||
|
||||
You are given a TEXT. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the TEXT.
|
||||
|
||||
1. **Entity Identification**:
|
||||
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
|
||||
- For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X).
|
||||
- **ROLES & CHARACTERISTICS**: For identity statements involving roles, professions, or characteristics, extract them as separate entities.
|
||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||
|
||||
2. **Entity Classification**:
|
||||
- Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
|
||||
- DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
|
||||
- If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context.
|
||||
- Each entity should have exactly ONE type that best describes what it is.
|
||||
- Classify pronouns (I, me, you, etc.) as "Alias" entities.
|
||||
|
||||
3. **Exclusions**:
|
||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||
- **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements.
|
||||
- Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately.
|
||||
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
|
||||
|
||||
4. **Entity Name Extraction**:
|
||||
- Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
|
||||
- When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle"
|
||||
- When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
|
||||
- **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
|
||||
- **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
|
||||
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||
- **NO TYPE SUFFIXES**: Never append the entity type to the entity name
|
||||
|
||||
5. **Temporal and Relationship Context Extraction**:
|
||||
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
|
||||
- EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
|
||||
- EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
|
||||
- DO NOT extract absolute dates, timestamps, or specific time points ("June 9, 2023", "3pm", "last Saturday")
|
||||
- DO NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday")
|
||||
|
||||
## Examples of Correct Entity Extraction:
|
||||
|
||||
**TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:**
|
||||
|
||||
✅ **EXTRACT - Relationship Temporal Information:**
|
||||
- Text: "I've known these friends for 4 years" → Extract: "4 years" (Duration)
|
||||
- Text: "since I moved from my home country" → Extract: "since moving" (TemporalContext)
|
||||
- Text: "after that tough breakup" → Extract: "after breakup" (TemporalContext)
|
||||
- Text: "we've been married for 5 years" → Extract: "5 years" (Duration)
|
||||
- Text: "during college" → Extract: "during college" (TemporalContext)
|
||||
|
||||
❌ **EXCLUDE - Absolute Dates/Times:**
|
||||
- Text: "on June 9, 2023" → Don't extract "June 9, 2023"
|
||||
- Text: "last Saturday" → Don't extract "last Saturday"
|
||||
- Text: "at 3pm yesterday" → Don't extract "3pm" or "yesterday"
|
||||
- Text: "next week" → Don't extract "next week"
|
||||
|
||||
**RELATIONSHIP CONTEXT ENTITIES:**
|
||||
- Text: "my close friends" → Extract: "close friends" (QualifiedGroup)
|
||||
- Text: "strong support system" → Extract: "support system" (RelationshipType)
|
||||
- Text: "work colleagues" → Extract: "work colleagues" (ProfessionalGroup)
|
||||
- Text: "family members" → Extract: "family members" (FamilyGroup)
|
||||
|
||||
**STANDARD ENTITY EXTRACTION:**
|
||||
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
|
||||
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
|
||||
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
|
||||
|
||||
**INCORRECT Examples:**
|
||||
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
|
||||
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
|
||||
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
{
|
||||
"entities": [
|
||||
{
|
||||
"name": "Entity Name",
|
||||
"type": "Entity Type"
|
||||
}
|
||||
// Additional entities...
|
||||
]
|
||||
}
|
||||
</output>`;
|
||||
const userPrompt = `
|
||||
<TEXT>
|
||||
${context.episodeContent}
|
||||
</TEXT>
|
||||
|
||||
<ENTITY_TYPES>
|
||||
${JSON.stringify(context.entityTypes || {}, null, 2)}
|
||||
</ENTITY_TYPES>
|
||||
`;
|
||||
|
||||
return [
|
||||
{ role: "system", content: sysPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
];
|
||||
};
|
||||
/**
|
||||
* Extract entities from an episode using JSON-based approach
|
||||
*/
|
||||
export const extractJson = (context: Record<string, any>): CoreMessage[] => {
|
||||
const sysPrompt = `You are an AI assistant that extracts entity nodes from text.
|
||||
Your primary task is to extract and classify significant entities mentioned in the content.`;
|
||||
|
||||
const userPrompt = `
|
||||
<PREVIOUS EPISODES>
|
||||
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
||||
</PREVIOUS EPISODES>
|
||||
|
||||
<CURRENT EPISODE>
|
||||
${context.episodeContent}
|
||||
</CURRENT EPISODE>
|
||||
|
||||
<ENTITY TYPES>
|
||||
${JSON.stringify(context.entityTypes || {}, null, 2)}
|
||||
</ENTITY TYPES>
|
||||
|
||||
Instructions:
|
||||
|
||||
Extract all significant entities mentioned in the CURRENT EPISODE. For each entity, provide a name and type.
|
||||
Respond with a JSON object containing an "entities" array of objects, each with "name" and "type" properties.
|
||||
|
||||
Guidelines:
|
||||
1. Extract significant entities, concepts, or actors mentioned in the content.
|
||||
2. Avoid creating nodes for relationships or actions.
|
||||
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
||||
4. **CLEAN ENTITY NAMES**: Extract ONLY the core entity name WITHOUT type descriptors:
|
||||
- "Tesla car" → Name: "Tesla", Type: "Vehicle"
|
||||
- Remove words like "app", "system", "platform", "tool", "service", "company" from entity names
|
||||
5. Use full names when available and avoid abbreviations.
|
||||
|
||||
${context.customPrompt || ""}
|
||||
`;
|
||||
|
||||
return [
|
||||
{ role: "system", content: sysPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolve entity duplications
|
||||
@ -286,84 +151,53 @@ export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
|
||||
return [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
|
||||
content: `You are a helpful assistant who determines whether extracted entities are duplicates of existing entities.
|
||||
|
||||
## CRITICAL RULE: Entity Type Matters
|
||||
DO NOT mark entities with different types as duplicates, even if they have identical names.
|
||||
- DO NOT mark "John" (Person) and "John" (Company) as duplicates
|
||||
- DO NOT mark "Apple" (Company) and "Apple" (Fruit) as duplicates
|
||||
- DO NOT mark "Core" (App) and "Core" (Concept) as duplicates
|
||||
|
||||
Consider entities as potential duplicates ONLY if they have:
|
||||
1. Similar or identical names AND
|
||||
2. The EXACT SAME entity type
|
||||
Focus on name-based similarity and contextual meaning to identify duplicates.
|
||||
|
||||
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
||||
{
|
||||
id: integer id of the entity,
|
||||
name: "name of the entity",
|
||||
entity_type: "ontological classification of the entity",
|
||||
entity_type_description: "Description of what the entity type represents",
|
||||
duplication_candidates: [
|
||||
{
|
||||
idx: integer index of the candidate entity,
|
||||
name: "name of the candidate entity",
|
||||
entity_type: "ontological classification of the candidate entity",
|
||||
...<additional attributes>
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
## Duplication Decision Rules
|
||||
For each entity, determine if it is a duplicate of any of its duplication candidates:
|
||||
## Duplication Decision Framework
|
||||
|
||||
### MARK AS DUPLICATE (duplicate_idx >= 0) when:
|
||||
- Verify the candidate has the SAME entity_type as the current entity
|
||||
- AND confirm the entities refer to the same real-world object or concept
|
||||
- AND check that the names are very similar or identical
|
||||
|
||||
### SPECIAL RULE FOR PREDICATES:
|
||||
**ALWAYS mark identical predicates as duplicates** - predicates are universal and reusable:
|
||||
- Mark "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0 ✓
|
||||
- Mark "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0 ✓
|
||||
- Mark "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0 ✓
|
||||
- **IDENTICAL NAMES**: Exact same name or obvious synonyms
|
||||
- **SEMANTIC EQUIVALENCE**: Different names but clearly referring to the same entity
|
||||
- **STRUCTURAL VARIATIONS**: Same entity with minor formatting differences
|
||||
|
||||
### DO NOT mark as duplicate (duplicate_idx = -1) when:
|
||||
- Confirm the candidate has a DIFFERENT entity_type (even with identical names)
|
||||
- Identify they are related but distinct entities
|
||||
- Recognize they have similar names or purposes but refer to separate instances or concepts
|
||||
- Distinguish when one is a general concept and the other is a specific instance
|
||||
- **EXCEPTION**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
|
||||
- **DIFFERENT INSTANCES**: Similar names but different real-world entities
|
||||
- **CONTEXTUAL DISTINCTION**: Same name but different contexts suggest distinct entities
|
||||
- **HIERARCHICAL RELATIONSHIPS**: One is part of/contains the other
|
||||
|
||||
## Examples:
|
||||
## Example Patterns:
|
||||
|
||||
**CORRECT - Mark as NOT Duplicates (Different Types):**
|
||||
- Set "Tesla" (Company) vs "Tesla" (Car) → duplicate_idx = -1
|
||||
- Set "Apple" (Company) vs "Apple" (Fruit) → duplicate_idx = -1
|
||||
- Set "Core" (App) vs "Core" (System) → duplicate_idx = -1
|
||||
**DUPLICATE CASES:**
|
||||
- "John Smith" vs "John Smith" → Check context for same person
|
||||
- "Microsoft" vs "Microsoft Corporation" → Same organization (duplicate_idx = 0)
|
||||
- "iPhone" vs "Apple iPhone" → Same product (duplicate_idx = 0)
|
||||
- "Tier 1" vs "Tier 1" → Same classification level (duplicate_idx = 0)
|
||||
|
||||
**CORRECT - Mark Predicates AS Duplicates (Same Name, Same Type):**
|
||||
- Set "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0
|
||||
- Set "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0
|
||||
- Set "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0
|
||||
**NOT DUPLICATE CASES:**
|
||||
- "Meeting Room A" vs "Meeting Room B" → Different rooms (duplicate_idx = -1)
|
||||
- "Project Alpha" vs "Project Beta" → Different projects (duplicate_idx = -1)
|
||||
- "Essential" vs "Critical" → Different priority levels (duplicate_idx = -1)
|
||||
- "Team Lead" vs "Team Member" → Different roles (duplicate_idx = -1)
|
||||
|
||||
**CORRECT - Evaluate Potential Duplicates (Same Type):**
|
||||
- Check if "John Smith" (Person) vs "John Smith" (Person) refer to same person
|
||||
- Check if "Microsoft" (Company) vs "Microsoft Corporation" (Company) are the same company
|
||||
- Check if "iPhone" (Product) vs "Apple iPhone" (Product) are the same product
|
||||
|
||||
**CORRECT - Mark as NOT Duplicates (Same Type, Different Instances):**
|
||||
- Set "Meeting" (Event) vs "Meeting" (Event) → duplicate_idx = -1 (different meetings)
|
||||
- Set "Project" (Task) vs "Project" (Task) → duplicate_idx = -1 (different projects)
|
||||
- **NOTE**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
|
||||
|
||||
## Task:
|
||||
Provide your response as a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
||||
|
||||
For each entity, include:
|
||||
- "id": the id of the entity (integer)
|
||||
- "name": the name of the entity (string)
|
||||
- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
|
||||
## Decision Guidelines:
|
||||
- **CONSERVATIVE APPROACH**: When uncertain, prefer NOT marking as duplicate
|
||||
- **CONTEXT MATTERS**: Consider the episode content and previous episodes
|
||||
- **SEMANTIC MEANING**: Focus on whether they refer to the same real-world entity
|
||||
|
||||
Format your response as follows:
|
||||
<output>
|
||||
@ -380,12 +214,9 @@ Format your response as follows:
|
||||
</output>
|
||||
|
||||
## Important Instructions:
|
||||
- FIRST check if entity types match before considering any duplication
|
||||
- If entity types don't match, immediately set duplicate_idx = -1
|
||||
- Only mark entities with identical types as potential duplicates
|
||||
- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
|
||||
- Always include all entities from the input in your response
|
||||
- Always wrap the output in these tags <output> </output>
|
||||
- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
|
||||
`,
|
||||
},
|
||||
{
|
||||
@ -412,16 +243,21 @@ export const extractAttributes = (
|
||||
): CoreMessage[] => {
|
||||
const sysPrompt = `
|
||||
You are an AI assistant that extracts and enhances entity attributes based on context.
|
||||
Your task is to analyze entities and provide appropriate attribute values for each entity based on its type definition.
|
||||
Your task is to analyze entities and provide appropriate attribute values based on available information.
|
||||
|
||||
For each entity:
|
||||
1. Look at its type and identify the required and optional attributes from the entity type definitions
|
||||
2. Check if the entity already has values for these attributes
|
||||
3. For missing attributes, extract appropriate values from the context if possible
|
||||
4. For existing attributes, enhance or correct them if needed based on the context
|
||||
5. Give empty attributes object ({}) when there are no attributes to update
|
||||
6. Only include attributes that you're updating - don't repeat existing attributes that don't need changes
|
||||
7. I'll merge your new attributes with the current attributes, so only provide values that should be added or modified
|
||||
1. Analyze the context to identify relevant attributes for the entity
|
||||
2. Extract appropriate values from the episode content if available
|
||||
3. Focus on factual, descriptive attributes rather than type classifications
|
||||
4. Give empty attributes object ({}) when there are no attributes to update
|
||||
5. Only include attributes that you're adding or modifying
|
||||
6. I'll merge your new attributes with existing ones, so only provide updates
|
||||
|
||||
Common attribute types to consider:
|
||||
- Descriptive properties (color, size, status, etc.)
|
||||
- Relational context (role, position, relationship, etc.)
|
||||
- Temporal information (duration, frequency, etc.)
|
||||
- Qualitative aspects (importance, preference, etc.)
|
||||
|
||||
Provide your output in this structure:
|
||||
<output>
|
||||
@ -441,10 +277,6 @@ Provide your output in this structure:
|
||||
</output>`;
|
||||
|
||||
const userPrompt = `
|
||||
<ENTITY_TYPES>
|
||||
${JSON.stringify(context.entityTypes, null, 2)}
|
||||
</ENTITY_TYPES>
|
||||
|
||||
<ENTITIES>
|
||||
${JSON.stringify(context.entities, null, 2)}
|
||||
</ENTITIES>
|
||||
@ -453,7 +285,7 @@ ${JSON.stringify(context.entities, null, 2)}
|
||||
${context.episodeContent}
|
||||
</EPISODE_CONTENT>
|
||||
|
||||
Based on the above information, please extract and enhance attributes for each entity according to its type definition. Return only the uuid and updated attributes for each entity.`;
|
||||
Based on the above information, please extract and enhance attributes for each entity based on the context. Return only the uuid and updated attributes for each entity.`;
|
||||
return [
|
||||
{ role: "system", content: sysPrompt },
|
||||
{ role: "user", content: userPrompt },
|
||||
|
||||
@ -132,10 +132,8 @@ Format your response as a JSON object with the following structure:
|
||||
"edges": [
|
||||
{
|
||||
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
|
||||
"sourceType": "[Source Entity Type]",
|
||||
"predicate": "[Relationship Type]",
|
||||
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
|
||||
"targetType": "[Target Entity Type]",
|
||||
"fact": "[Natural language representation of the fact]",
|
||||
"attributes": {
|
||||
"confidence": confidence of the fact,
|
||||
|
||||
@ -48,10 +48,10 @@ export interface EpisodicNode {
|
||||
export interface EntityNode {
|
||||
uuid: string;
|
||||
name: string;
|
||||
type: string; // Single type - either from presets or custom
|
||||
type?: string; // Optional type - can be inferred from statements
|
||||
attributes: Record<string, any>;
|
||||
nameEmbedding: number[];
|
||||
typeEmbedding: number[];
|
||||
typeEmbedding?: number[]; // Optional since type is optional
|
||||
createdAt: Date;
|
||||
userId: string;
|
||||
space?: string;
|
||||
@ -123,10 +123,10 @@ export type AddEpisodeResult = {
|
||||
|
||||
export interface ExtractedTripleData {
|
||||
source: string;
|
||||
sourceType: string;
|
||||
sourceType?: string; // Optional - can be inferred from statements
|
||||
predicate: string;
|
||||
target: string;
|
||||
targetType: string;
|
||||
targetType?: string; // Optional - can be inferred from statements
|
||||
fact: string;
|
||||
attributes?: Record<string, any>;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user