mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 23:58:26 +00:00
333 lines
9.9 KiB
TypeScript
333 lines
9.9 KiB
TypeScript
/**
|
|
* Prompts for extracting entity nodes from episodes
|
|
*/
|
|
|
|
import { type CoreMessage } from "ai";
|
|
|
|
export interface ExtractedEntity {
|
|
name: string;
|
|
type: string;
|
|
attributes?: Record<string, any>;
|
|
}
|
|
|
|
export interface ExtractedEntities {
|
|
entities: ExtractedEntity[];
|
|
}
|
|
|
|
export interface MissedEntities {
|
|
missedEntities: string[];
|
|
}
|
|
|
|
export interface EntityClassificationTriple {
|
|
uuid: string;
|
|
name: string;
|
|
type: string | null;
|
|
}
|
|
|
|
export interface EntityClassification {
|
|
entityClassifications: EntityClassificationTriple[];
|
|
}
|
|
|
|
/**
|
|
* Extract entities from an episode using message-based approach
|
|
*/
|
|
export const extract_message = (
|
|
context: Record<string, any>,
|
|
): CoreMessage[] => {
|
|
const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
|
|
Your primary task is to extract and classify significant entities mentioned in the conversation.
|
|
|
|
In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
|
|
Focus on extracting:
|
|
1. Subject entities (people, objects, concepts)
|
|
2. Object entities (people, objects, concepts)
|
|
|
|
Instructions:
|
|
|
|
You are given a conversation context and a CURRENT EPISODE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT EPISODE.
|
|
|
|
1. **Entity Identification**:
|
|
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE.
|
|
- **Exclude** entities mentioned only in the PREVIOUS EPISODES (they are for context only).
|
|
|
|
2. **Entity Classification**:
|
|
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
|
- Assign the appropriate type for each one.
|
|
|
|
3. **Exclusions**:
|
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
|
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
|
|
|
4. **Formatting**:
|
|
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
|
|
|
|
|
Format your response as a JSON object with the following structure:
|
|
<output>
|
|
{
|
|
"entities": [
|
|
{
|
|
"name": "Entity Name",
|
|
"type": "Entity Type",
|
|
}
|
|
// Additional entities...
|
|
]
|
|
}
|
|
</output>`;
|
|
|
|
const userPrompt = `
|
|
<PREVIOUS EPISODES>
|
|
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
|
</PREVIOUS EPISODES>
|
|
|
|
<CURRENT EPISODE>
|
|
${context.episodeContent}
|
|
</CURRENT EPISODE>`;
|
|
|
|
return [
|
|
{ role: "system", content: sysPrompt },
|
|
{ role: "user", content: userPrompt },
|
|
];
|
|
};
|
|
|
|
/**
|
|
* Extract entities from text-based content
|
|
*/
|
|
export const extract_text = (context: Record<string, any>): CoreMessage[] => {
|
|
const sysPrompt = `
|
|
You are an AI assistant that extracts entity nodes from text for a reified knowledge graph.
|
|
Your primary task is to extract and classify significant entities mentioned in the provided text.
|
|
|
|
In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
|
|
Focus on extracting:
|
|
1. Subject entities (people, objects, concepts)
|
|
2. Object entities (people, objects, concepts)
|
|
|
|
Instructions:
|
|
|
|
You are given a TEXT. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the TEXT.
|
|
|
|
1. **Entity Identification**:
|
|
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
|
|
|
|
2. **Entity Classification**:
|
|
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
|
- Assign the appropriate type for each one.
|
|
|
|
3. **Exclusions**:
|
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
|
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
|
|
|
4. **Formatting**:
|
|
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
|
|
|
|
|
Format your response as a JSON object with the following structure:
|
|
<output>
|
|
{
|
|
"entities": [
|
|
{
|
|
"name": "Entity Name",
|
|
"type": "Entity Type"
|
|
}
|
|
// Additional entities...
|
|
]
|
|
}
|
|
</output>`;
|
|
const userPrompt = `
|
|
<TEXT>
|
|
${context.episodeContent}
|
|
</TEXT>
|
|
`;
|
|
|
|
return [
|
|
{ role: "system", content: sysPrompt },
|
|
{ role: "user", content: userPrompt },
|
|
];
|
|
};
|
|
|
|
/**
|
|
* Extract entities from an episode using JSON-based approach
|
|
*/
|
|
export const extract_json = (context: Record<string, any>): CoreMessage[] => {
|
|
const sysPrompt = `You are an AI assistant that extracts entity nodes from text.
|
|
Your primary task is to extract and classify significant entities mentioned in the content.`;
|
|
|
|
const userPrompt = `
|
|
<PREVIOUS EPISODES>
|
|
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
|
</PREVIOUS EPISODES>
|
|
|
|
<CURRENT EPISODE>
|
|
${context.episodeContent}
|
|
</CURRENT EPISODE>
|
|
|
|
<ENTITY TYPES>
|
|
${JSON.stringify(context.entityTypes || {}, null, 2)}
|
|
</ENTITY TYPES>
|
|
|
|
Instructions:
|
|
|
|
Extract all significant entities mentioned in the CURRENT EPISODE. For each entity, provide a name and type.
|
|
Respond with a JSON object containing an "entities" array of objects, each with "name" and "type" properties.
|
|
|
|
Guidelines:
|
|
1. Extract significant entities, concepts, or actors mentioned in the content.
|
|
2. Avoid creating nodes for relationships or actions.
|
|
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
|
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
|
|
|
${context.customPrompt || ""}
|
|
`;
|
|
|
|
return [
|
|
{ role: "system", content: sysPrompt },
|
|
{ role: "user", content: userPrompt },
|
|
];
|
|
};
|
|
|
|
/**
|
|
* Check for missed entities
|
|
*/
|
|
export const reflexion = (context: Record<string, any>): CoreMessage[] => {
|
|
const sysPrompt = `You are an AI assistant that determines which entities have not been extracted from the given context`;
|
|
|
|
const userPrompt = `
|
|
<PREVIOUS EPISODES>
|
|
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
|
</PREVIOUS EPISODES>
|
|
|
|
<CURRENT EPISODE>
|
|
${context.episodeContent}
|
|
</CURRENT EPISODE>
|
|
|
|
<EXTRACTED ENTITIES>
|
|
${JSON.stringify(context.extractedEntities || [], null, 2)}
|
|
</EXTRACTED ENTITIES>
|
|
|
|
Given the above previous episodes, current episode, and list of extracted entities; determine if any entities haven't been
|
|
extracted. Respond with a JSON object containing a "missedEntities" array of strings.
|
|
`;
|
|
|
|
return [
|
|
{ role: "system", content: sysPrompt },
|
|
{ role: "user", content: userPrompt },
|
|
];
|
|
};
|
|
|
|
/**
|
|
* Extract additional attributes for entities
|
|
*/
|
|
export const extract_attributes = (
|
|
context: Record<string, any>,
|
|
): CoreMessage[] => {
|
|
return [
|
|
{
|
|
role: "system",
|
|
content:
|
|
"You are a helpful assistant that extracts entity properties from the provided text.",
|
|
},
|
|
{
|
|
role: "user",
|
|
content: `
|
|
<EPISODES>
|
|
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
|
${JSON.stringify(context.episodeContent, null, 2)}
|
|
</EPISODES>
|
|
|
|
Given the above EPISODES and the following ENTITY, update any of its attributes based on the information provided
|
|
in EPISODES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
|
|
|
Guidelines:
|
|
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
|
2. Only use the provided EPISODES and ENTITY to set attribute values.
|
|
3. The summary attribute represents a summary of the ENTITY, and should be updated with new information about the Entity from the EPISODES.
|
|
Summaries must be no longer than 250 words.
|
|
|
|
<ENTITY>
|
|
${JSON.stringify(context.node, null, 2)}
|
|
</ENTITY>
|
|
`,
|
|
},
|
|
];
|
|
};
|
|
|
|
/**
|
|
* Resolve entity duplications
|
|
*/
|
|
export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
|
|
return [
|
|
{
|
|
role: "system",
|
|
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
|
|
|
|
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
|
{
|
|
id: integer id of the entity,
|
|
name: "name of the entity",
|
|
entity_type: "ontological classification of the entity",
|
|
entity_type_description: "Description of what the entity type represents",
|
|
duplication_candidates: [
|
|
{
|
|
idx: integer index of the candidate entity,
|
|
name: "name of the candidate entity",
|
|
entity_type: "ontological classification of the candidate entity",
|
|
...<additional attributes>
|
|
}
|
|
]
|
|
}
|
|
|
|
For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
|
|
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
|
Do NOT mark entities as duplicates if:
|
|
- They are related but distinct.
|
|
- They have similar names or purposes but refer to separate instances or concepts.
|
|
|
|
Task:
|
|
Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
|
|
|
For each entity, include:
|
|
- "id": the id of the entity (integer)
|
|
- "name": the name of the entity (string)
|
|
- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
|
|
|
|
Format your response as follows:
|
|
<output>
|
|
{
|
|
"entity_resolutions": [
|
|
{
|
|
"id": 0,
|
|
"name": "Entity Name",
|
|
"duplicate_idx": -1
|
|
},
|
|
// Additional entity resolutions...
|
|
]
|
|
}
|
|
</output>
|
|
|
|
Notes:
|
|
- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate.
|
|
- If an entity is not a duplicate of any candidate, set duplicate_idx to -1.
|
|
- Always include all entities from the input in your response.
|
|
- Always wrap the output in these tags <output> </output>
|
|
`,
|
|
},
|
|
{
|
|
role: "user",
|
|
content: `
|
|
<PREVIOUS EPISODES>
|
|
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
|
</PREVIOUS EPISODES>
|
|
|
|
<CURRENT EPISODE>
|
|
${context.episodeContent}
|
|
</CURRENT EPISODE>
|
|
|
|
<ENTITIES>
|
|
${JSON.stringify(context.extracted_nodes, null, 2)}
|
|
</ENTITIES>
|
|
`,
|
|
},
|
|
];
|
|
};
|