mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-10 23:48:26 +00:00
refactor: implement statement extraction and resolution
This commit is contained in:
parent
0853a30897
commit
96d829642b
@ -10,7 +10,8 @@ import {
|
||||
import { LLMMappings, LLMModelEnum } from "@recall/types";
|
||||
import { logger } from "./logger.service";
|
||||
import crypto from "crypto";
|
||||
import { extract_message, extract_text } from "./prompts/extractNodes";
|
||||
import { dedupeNodes, extract_message, extract_text } from "./prompts/nodes";
|
||||
import { extract_statements } from "./prompts/statements";
|
||||
|
||||
export enum EpisodeType {
|
||||
Conversation = "CONVERSATION",
|
||||
@ -41,7 +42,7 @@ export interface EpisodicNode {
|
||||
* Entities represent subjects, objects, or predicates in statements
|
||||
*/
|
||||
export interface EntityNode {
|
||||
uuid?: string;
|
||||
uuid: string;
|
||||
name: string;
|
||||
type: string;
|
||||
attributes: Record<string, any>;
|
||||
@ -211,8 +212,8 @@ export class KnowledgeGraphService {
|
||||
|
||||
// Step 5: Statement Extraction - Extract statements (triples) instead of direct edges
|
||||
const extractedStatements = await this.extractStatements(
|
||||
resolvedNodes,
|
||||
episode,
|
||||
resolvedNodes,
|
||||
previousEpisodes,
|
||||
);
|
||||
|
||||
@ -292,21 +293,27 @@ export class KnowledgeGraphService {
|
||||
},
|
||||
);
|
||||
|
||||
const extractedEntities = JSON.parse(responseText || "{}").entities || [];
|
||||
|
||||
// Convert to EntityNode objects
|
||||
const entities: EntityNode[] = [];
|
||||
|
||||
for (const entity of extractedEntities) {
|
||||
entities.push({
|
||||
uuid: crypto.randomUUID(),
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
attributes: entity.attributes || {},
|
||||
nameEmbedding: [], // Will be populated later
|
||||
createdAt: new Date(),
|
||||
userId: episode.userId,
|
||||
});
|
||||
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
||||
if (outputMatch && outputMatch[1]) {
|
||||
responseText = outputMatch[1].trim();
|
||||
const extractedEntities = JSON.parse(responseText || "{}").entities || [];
|
||||
|
||||
entities.push(
|
||||
...(await Promise.all(
|
||||
extractedEntities.map(async (entity: any) => ({
|
||||
uuid: crypto.randomUUID(),
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
attributes: entity.attributes || {},
|
||||
nameEmbedding: await this.getEmbedding(entity.name),
|
||||
createdAt: new Date(),
|
||||
userId: episode.userId,
|
||||
})),
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
return entities;
|
||||
@ -321,55 +328,102 @@ export class KnowledgeGraphService {
|
||||
previousEpisodes: EpisodicNode[],
|
||||
): Promise<{ resolvedNodes: EntityNode[]; uuidMap: Map<string, string> }> {
|
||||
const uuidMap = new Map<string, string>();
|
||||
const resolvedNodes: EntityNode[] = [];
|
||||
|
||||
for (const extractedNode of extractedNodes) {
|
||||
// Generate embedding for the node name
|
||||
const nameEmbedding = await this.getEmbedding(extractedNode.name);
|
||||
const existingNodesLists = await Promise.all(
|
||||
extractedNodes.map(async (extractedNode) => {
|
||||
// Check if a similar node already exists in HelixDB
|
||||
// Use vector similarity search to find similar entities
|
||||
// Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance)
|
||||
const similarEntities = await helixClient.query("findSimilarEntities", {
|
||||
queryEmbedding: extractedNode.nameEmbedding,
|
||||
limit: 5, // Get top 5 matches
|
||||
threshold: 0.85, // 85% similarity threshold
|
||||
});
|
||||
|
||||
// Check if a similar node already exists in HelixDB
|
||||
// Use vector similarity search to find similar entities
|
||||
// Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance)
|
||||
const similarEntities = await helixClient.query("findSimilarEntities", {
|
||||
queryEmbedding: nameEmbedding,
|
||||
limit: 5, // Get top 5 matches
|
||||
threshold: 0.85, // 85% similarity threshold
|
||||
return similarEntities.nodes;
|
||||
}),
|
||||
);
|
||||
|
||||
if (!existingNodesLists || existingNodesLists.length === 0) {
|
||||
extractedNodes.forEach((node) => {
|
||||
uuidMap.set(node.uuid, node.uuid);
|
||||
});
|
||||
|
||||
const existingNodes = similarEntities.nodes;
|
||||
|
||||
// Get entity types dictionary or empty object if not provided
|
||||
const entityTypesDict = entity_types || {};
|
||||
|
||||
if (similarEntities.length > 0) {
|
||||
// If similar nodes exist, we need to decide if we want to merge with an existing one
|
||||
// This could involve LLM to determine if they're the same entity
|
||||
const existingNode = similarEntities[0];
|
||||
|
||||
// Map the extracted node UUID to the existing node UUID
|
||||
uuidMap.set(extractedNode.uuid, existingNode.uuid);
|
||||
|
||||
// Add the existing node to our resolved nodes if not already present
|
||||
if (!resolvedNodes.some((node) => node.uuid === existingNode.uuid)) {
|
||||
resolvedNodes.push({
|
||||
uuid: existingNode.uuid,
|
||||
name: existingNode.name,
|
||||
type: existingNode.type,
|
||||
attributes: existingNode.attributes || {},
|
||||
nameEmbedding: existingNode.nameEmbedding,
|
||||
createdAt: new Date(existingNode.createdAt),
|
||||
userId: existingNode.userId,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// This is a new entity, add embedding and keep as is
|
||||
extractedNode.nameEmbedding = nameEmbedding;
|
||||
resolvedNodes.push(extractedNode);
|
||||
uuidMap.set(extractedNode.uuid, extractedNode.uuid);
|
||||
}
|
||||
return { resolvedNodes: extractedNodes, uuidMap };
|
||||
}
|
||||
|
||||
return { resolvedNodes, uuidMap };
|
||||
// Prepare context for LLM
|
||||
const extractedNodesContext = extractedNodes.map(
|
||||
(node: EntityNode, i: number) => {
|
||||
return {
|
||||
id: i,
|
||||
name: node.name,
|
||||
entity_type: node.type,
|
||||
entity_type_description: "Default Entity Type",
|
||||
duplication_candidates: existingNodesLists[i].map(
|
||||
(candidate: EntityNode, j: number) => ({
|
||||
idx: j,
|
||||
name: candidate.name,
|
||||
entity_types: candidate.type,
|
||||
...candidate.attributes,
|
||||
}),
|
||||
),
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
const context = {
|
||||
extracted_nodes: extractedNodesContext,
|
||||
episode_content: episode ? episode.content : "",
|
||||
previous_episodes: previousEpisodes
|
||||
? previousEpisodes.map((ep) => ep.content)
|
||||
: [],
|
||||
};
|
||||
|
||||
const messages = dedupeNodes(context);
|
||||
|
||||
let responseText = "";
|
||||
|
||||
await this.makeModelCall(
|
||||
false,
|
||||
LLMModelEnum.GPT41,
|
||||
messages as CoreMessage[],
|
||||
(text) => {
|
||||
responseText = text;
|
||||
},
|
||||
);
|
||||
|
||||
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
||||
if (outputMatch && outputMatch[1]) {
|
||||
responseText = outputMatch[1].trim();
|
||||
const parsedResponse = JSON.parse(responseText);
|
||||
const nodeResolutions = parsedResponse.entity_resolutions || [];
|
||||
|
||||
// Process each node resolution to either map to an existing node or keep as new
|
||||
const resolvedNodes = nodeResolutions.map((resolution: any) => {
|
||||
const resolutionId = resolution.id ?? -1;
|
||||
const duplicateIdx = resolution.duplicate_idx ?? -1;
|
||||
const extractedNode = extractedNodes[resolutionId];
|
||||
|
||||
// If a duplicate was found, use the existing node, otherwise use the extracted node
|
||||
const resolvedNode =
|
||||
duplicateIdx >= 0 &&
|
||||
duplicateIdx < existingNodesLists[resolutionId]?.length
|
||||
? existingNodesLists[resolutionId][duplicateIdx]
|
||||
: extractedNode;
|
||||
|
||||
// Update the name if provided in the resolution
|
||||
if (resolution.name) {
|
||||
resolvedNode.name = resolution.name;
|
||||
}
|
||||
|
||||
// Map the extracted UUID to the resolved UUID
|
||||
uuidMap.set(extractedNode.uuid, resolvedNode.uuid);
|
||||
|
||||
return resolvedNode;
|
||||
});
|
||||
|
||||
return { resolvedNodes, uuidMap };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -388,18 +442,16 @@ export class KnowledgeGraphService {
|
||||
content: ep.content,
|
||||
createdAt: ep.createdAt.toISOString(),
|
||||
})),
|
||||
nodes: resolvedNodes.map((node) => ({
|
||||
entities: resolvedNodes.map((node) => ({
|
||||
name: node.name,
|
||||
type: node.type,
|
||||
uuid: node.uuid,
|
||||
})),
|
||||
referenceTime: episode.validAt.toISOString(),
|
||||
relationshipTypes: {}, // Could be populated with relationship definitions
|
||||
};
|
||||
|
||||
// Get the statement extraction prompt from the prompt library
|
||||
// Note: You might need to update your prompts to extract subject-predicate-object patterns
|
||||
const messages = promptLibrary.extractEdges.edge.call(context);
|
||||
const messages = extract_statements(context);
|
||||
|
||||
let responseText = "";
|
||||
|
||||
@ -413,65 +465,90 @@ export class KnowledgeGraphService {
|
||||
);
|
||||
|
||||
// Parse the statements from the LLM response
|
||||
// This will need to be updated based on your prompt format
|
||||
const extractedTriples = JSON.parse(responseText || "{}").edges || [];
|
||||
|
||||
// Convert to Triple objects with Statement nodes
|
||||
const triples: Triple[] = [];
|
||||
// Convert extracted triples to Triple objects with Statement nodes
|
||||
const triples = await Promise.all(
|
||||
// Fix: Type 'any'.
|
||||
extractedTriples.map(async (triple: any) => {
|
||||
// Find the subject and object nodes
|
||||
const subjectNode = resolvedNodes.find(
|
||||
(node) => node.name.toLowerCase() === triple.source.toLowerCase(),
|
||||
);
|
||||
|
||||
for (const triple of extractedTriples) {
|
||||
const subjectNode = resolvedNodes.find(
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.sourceEntityName.toLowerCase(),
|
||||
);
|
||||
const objectNode = resolvedNodes.find(
|
||||
(node) => node.name.toLowerCase() === triple.target.toLowerCase(),
|
||||
);
|
||||
|
||||
const objectNode = resolvedNodes.find(
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.targetEntityName.toLowerCase(),
|
||||
);
|
||||
|
||||
// Find or create a predicate node for the relationship type
|
||||
const predicateNode = resolvedNodes.find(
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.relationshipType.toLowerCase(),
|
||||
) || {
|
||||
uuid: crypto.randomUUID(),
|
||||
name: triple.relationshipType,
|
||||
type: "Predicate",
|
||||
attributes: {},
|
||||
nameEmbedding: [], // Will be populated later
|
||||
createdAt: new Date(),
|
||||
userId: episode.userId,
|
||||
};
|
||||
|
||||
if (subjectNode && objectNode) {
|
||||
// Generate embedding for the fact
|
||||
const factEmbedding = await this.getEmbedding(triple.fact);
|
||||
|
||||
// Create a statement node
|
||||
const statement: StatementNode = {
|
||||
// Find or create a predicate node for the relationship type
|
||||
const predicateNode = resolvedNodes.find(
|
||||
(node) =>
|
||||
node.name.toLowerCase() === triple.relationship.toLowerCase(),
|
||||
) || {
|
||||
uuid: crypto.randomUUID(),
|
||||
fact: triple.fact,
|
||||
groupId: crypto.randomUUID().slice(0, 8), // Could be used to group related statements
|
||||
name: triple.relationship,
|
||||
type: "Predicate",
|
||||
attributes: {},
|
||||
nameEmbedding: await this.getEmbedding(triple.relationship),
|
||||
createdAt: new Date(),
|
||||
validAt: triple.validAt ? new Date(triple.validAt) : episode.validAt,
|
||||
invalidAt: triple.invalidAt ? new Date(triple.invalidAt) : undefined,
|
||||
attributesJson: JSON.stringify({}), // Could store additional metadata
|
||||
embedding: factEmbedding,
|
||||
userId: episode.userId,
|
||||
};
|
||||
|
||||
triples.push({
|
||||
statement,
|
||||
subject: subjectNode,
|
||||
predicate: predicateNode,
|
||||
object: objectNode,
|
||||
provenance: episode,
|
||||
});
|
||||
}
|
||||
if (subjectNode && objectNode) {
|
||||
// Create a statement node
|
||||
const statement: StatementNode = {
|
||||
uuid: crypto.randomUUID(),
|
||||
fact: triple.fact,
|
||||
factEmbedding: await this.getEmbedding(triple.fact),
|
||||
createdAt: new Date(),
|
||||
validAt: episode.validAt,
|
||||
invalidAt: null,
|
||||
attributes: triple.attributes || {},
|
||||
userId: episode.userId,
|
||||
};
|
||||
|
||||
return {
|
||||
statement,
|
||||
subject: subjectNode,
|
||||
predicate: predicateNode,
|
||||
object: objectNode,
|
||||
provenance: episode,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}),
|
||||
);
|
||||
|
||||
// Filter out null values (where subject or object wasn't found)
|
||||
return triples.filter(Boolean) as Triple[];
|
||||
}
|
||||
|
||||
private async resolvePredicateNodes(
|
||||
triples: Triple[],
|
||||
episode: EpisodicNode,
|
||||
) {
|
||||
const predicateNodes: EntityNode[] = triples.map((triple: Triple) => {
|
||||
return triple.predicate;
|
||||
});
|
||||
|
||||
if (predicateNodes.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
return triples;
|
||||
const existingNodesLists = await Promise.all(
|
||||
predicateNodes.map(async (predicateNode) => {
|
||||
// Check if a similar node already exists in HelixDB
|
||||
// Use vector similarity search to find similar entities
|
||||
// Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance)
|
||||
const similarEntities = await helixClient.query("findSimilarEntities", {
|
||||
queryEmbedding: predicateNode.nameEmbedding,
|
||||
limit: 5, // Get top 5 matches
|
||||
threshold: 0.85, // 85% similarity threshold
|
||||
});
|
||||
|
||||
return similarEntities.nodes;
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -3,6 +3,6 @@
|
||||
*/
|
||||
|
||||
// Export types from individual prompt modules
|
||||
export { type ExtractedEntity, type ExtractedEntities } from "./extractNodes";
|
||||
export { type ExtractedEntity, type ExtractedEntities } from "./nodes";
|
||||
export { type Edge, type ExtractedEdges } from "./extractEdges";
|
||||
export { type ContradictionResult } from "./contradiction";
|
||||
|
||||
@ -34,21 +34,13 @@ export interface EntityClassification {
|
||||
export const extract_message = (
|
||||
context: Record<string, any>,
|
||||
): CoreMessage[] => {
|
||||
const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages.
|
||||
Your primary task is to extract and classify significant entities mentioned in the conversation.`;
|
||||
const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
|
||||
Your primary task is to extract and classify significant entities mentioned in the conversation.
|
||||
|
||||
const userPrompt = `
|
||||
<PREVIOUS EPISODES>
|
||||
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
||||
</PREVIOUS EPISODES>
|
||||
|
||||
<CURRENT EPISODE>
|
||||
${context.episodeContent}
|
||||
</CURRENT EPISODE>
|
||||
|
||||
<ENTITY TYPES>
|
||||
${JSON.stringify(context.entityTypes || {}, null, 2)}
|
||||
</ENTITY TYPES>
|
||||
In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
|
||||
Focus on extracting:
|
||||
1. Subject entities (people, objects, concepts)
|
||||
2. Object entities (people, objects, concepts)
|
||||
|
||||
Instructions:
|
||||
|
||||
@ -63,14 +55,34 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
||||
- Assign the appropriate type for each one.
|
||||
|
||||
3. **Exclusions**:
|
||||
- Do NOT extract entities representing relationships or actions.
|
||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
||||
|
||||
4. **Formatting**:
|
||||
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
||||
|
||||
${context.customPrompt || ""}
|
||||
`;
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
{
|
||||
"entities": [
|
||||
{
|
||||
"name": "Entity Name",
|
||||
"type": "Entity Type",
|
||||
}
|
||||
// Additional entities...
|
||||
]
|
||||
}
|
||||
</output>`;
|
||||
|
||||
const userPrompt = `
|
||||
<PREVIOUS EPISODES>
|
||||
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
||||
</PREVIOUS EPISODES>
|
||||
|
||||
<CURRENT EPISODE>
|
||||
${context.episodeContent}
|
||||
</CURRENT EPISODE>`;
|
||||
|
||||
return [
|
||||
{ role: "system", content: sysPrompt },
|
||||
@ -82,29 +94,50 @@ ${context.customPrompt || ""}
|
||||
* Extract entities from text-based content
|
||||
*/
|
||||
export const extract_text = (context: Record<string, any>): CoreMessage[] => {
|
||||
const sysPrompt = `You are an AI assistant that extracts entity nodes from text.
|
||||
Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text.`;
|
||||
const sysPrompt = `
|
||||
You are an AI assistant that extracts entity nodes from text for a reified knowledge graph.
|
||||
Your primary task is to extract and classify significant entities mentioned in the provided text.
|
||||
|
||||
In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
|
||||
Focus on extracting:
|
||||
1. Subject entities (people, objects, concepts)
|
||||
2. Object entities (people, objects, concepts)
|
||||
|
||||
Instructions:
|
||||
|
||||
You are given a TEXT. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the TEXT.
|
||||
|
||||
1. **Entity Identification**:
|
||||
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
|
||||
|
||||
2. **Entity Classification**:
|
||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
||||
- Assign the appropriate type for each one.
|
||||
|
||||
3. **Exclusions**:
|
||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
||||
|
||||
4. **Formatting**:
|
||||
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
||||
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
{
|
||||
"entities": [
|
||||
{
|
||||
"name": "Entity Name",
|
||||
"type": "Entity Type"
|
||||
}
|
||||
// Additional entities...
|
||||
]
|
||||
}
|
||||
</output>`;
|
||||
const userPrompt = `
|
||||
<TEXT>
|
||||
${context.episodeContent}
|
||||
</TEXT>
|
||||
|
||||
<ENTITY TYPES>
|
||||
${JSON.stringify(context.entityTypes || {}, null, 2)}
|
||||
</ENTITY TYPES>
|
||||
|
||||
Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned.
|
||||
For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
|
||||
Indicate the classified entity type by providing its entity_type_id.
|
||||
|
||||
${context.customPrompt || ""}
|
||||
|
||||
Guidelines:
|
||||
1. Extract significant entities, concepts, or actors mentioned in the conversation.
|
||||
2. Avoid creating nodes for relationships or actions.
|
||||
3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
|
||||
4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
|
||||
`;
|
||||
|
||||
return [
|
||||
@ -218,3 +251,82 @@ ${JSON.stringify(context.node, null, 2)}
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolve entity duplications
|
||||
*/
|
||||
export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
|
||||
return [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
|
||||
|
||||
Each entity in ENTITIES is represented as a JSON object with the following structure:
|
||||
{
|
||||
id: integer id of the entity,
|
||||
name: "name of the entity",
|
||||
entity_type: "ontological classification of the entity",
|
||||
entity_type_description: "Description of what the entity type represents",
|
||||
duplication_candidates: [
|
||||
{
|
||||
idx: integer index of the candidate entity,
|
||||
name: "name of the candidate entity",
|
||||
entity_type: "ontological classification of the candidate entity",
|
||||
...<additional attributes>
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
|
||||
Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
|
||||
Do NOT mark entities as duplicates if:
|
||||
- They are related but distinct.
|
||||
- They have similar names or purposes but refer to separate instances or concepts.
|
||||
|
||||
Task:
|
||||
Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity.
|
||||
|
||||
For each entity, include:
|
||||
- "id": the id of the entity (integer)
|
||||
- "name": the name of the entity (string)
|
||||
- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
|
||||
|
||||
Format your response as follows:
|
||||
<output>
|
||||
{
|
||||
"entity_resolutions": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "Entity Name",
|
||||
"duplicate_idx": -1
|
||||
},
|
||||
// Additional entity resolutions...
|
||||
]
|
||||
}
|
||||
</output>
|
||||
|
||||
Notes:
|
||||
- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate.
|
||||
- If an entity is not a duplicate of any candidate, set duplicate_idx to -1.
|
||||
- Always include all entities from the input in your response.
|
||||
- Always wrap the output in these tags <output> </output>
|
||||
`,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `
|
||||
<PREVIOUS EPISODES>
|
||||
${JSON.stringify(context.previousEpisodes || [], null, 2)}
|
||||
</PREVIOUS EPISODES>
|
||||
|
||||
<CURRENT EPISODE>
|
||||
${context.episodeContent}
|
||||
</CURRENT EPISODE>
|
||||
|
||||
<ENTITIES>
|
||||
${JSON.stringify(context.extracted_nodes, null, 2)}
|
||||
</ENTITIES>
|
||||
`,
|
||||
},
|
||||
];
|
||||
};
|
||||
243
apps/webapp/app/services/prompts/statements.ts
Normal file
243
apps/webapp/app/services/prompts/statements.ts
Normal file
@ -0,0 +1,243 @@
|
||||
import { type CoreMessage } from "ai";
|
||||
import { type Triple } from "../knowledgeGraph.server";
|
||||
|
||||
/**
|
||||
* Extract statements (triples) from episode content in a reified knowledge graph model
|
||||
* This function generates a prompt for LLM to extract subject-predicate-object statements
|
||||
* and represent them as first-class nodes with proper connections
|
||||
*/
|
||||
export const extract_statements = (
|
||||
context: Record<string, any>,
|
||||
): CoreMessage[] => {
|
||||
return [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a knowledge graph expert that extracts factual statements from text as subject-predicate-object triples.
|
||||
Your task is to identify important facts and represent them in a reified knowledge graph model
|
||||
where each statement is a first-class node connected to subject, predicate, and object entities.
|
||||
|
||||
I need to extract factual statements from the following conversation/text and represent them in a reified knowledge graph.
|
||||
|
||||
Follow these instructions carefully:
|
||||
|
||||
1. Identify key factual statements from the episode content and previous episodes
|
||||
2. Represent each statement as a subject-predicate-object triple
|
||||
3. Only use entities from the AVAILABLE ENTITIES list as subjects and objects
|
||||
4. For each statement, provide:
|
||||
- The subject entity name (must match exactly one from AVAILABLE ENTITIES)
|
||||
- The predicate/relationship (a clear, concise verb or relationship type)
|
||||
- The object entity name (must match exactly one from AVAILABLE ENTITIES)
|
||||
- A natural language fact that accurately represents the triple
|
||||
- Any additional attributes relevant to the relationship
|
||||
|
||||
IMPORTANT ABOUT TEMPORAL INFORMATION:
|
||||
- The system tracks when facts become known (validAt) and contradicted (invalidAt) separately
|
||||
- You must include any temporal information WITHIN the fact statement itself
|
||||
- For example, if someone worked at a company from 2015-2020, include this in the "fact" field and "attributes.timespan" field
|
||||
- Do NOT omit temporal information from facts - it's critical context
|
||||
- Examples of good temporal facts:
|
||||
* "John worked at Google from 2015 to 2020"
|
||||
* "Sarah lived in New York until 2018"
|
||||
* "The project was completed on March 15, 2023"
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
{
|
||||
"edges": [
|
||||
{
|
||||
"source": "[Subject Entity Name]",
|
||||
"relationship": "[Predicate/Relationship Type]",
|
||||
"target": "[Object Entity Name]",
|
||||
"fact": "[Natural language representation of the fact INCLUDING any temporal information]",
|
||||
"attributes": {
|
||||
"confidence": 0.9, // How confident you are in this fact (0-1)
|
||||
"source": "explicit", // Whether the fact was explicitly stated or inferred
|
||||
"timespan": { // Include if the fact has a specific time period
|
||||
"start": "2015", // When the fact started being true (if known)
|
||||
"end": "2020" // When the fact stopped being true (if known)
|
||||
}
|
||||
}
|
||||
},
|
||||
// Additional statements...
|
||||
]
|
||||
}
|
||||
</output>
|
||||
|
||||
Important guidelines:
|
||||
- Only include the most significant and factual statements
|
||||
- Do not invent entities not present in the AVAILABLE ENTITIES list
|
||||
- Be precise in representing the relationships
|
||||
- Each fact should be atomic (representing a single piece of information)
|
||||
- ALWAYS include temporal information when available (dates, periods, etc.) in both the fact text AND attributes
|
||||
- Facts should be based on the episode content, not general knowledge
|
||||
- Aim for quality over quantity, prioritize clear, unambiguous statements
|
||||
- For ongoing facts (still true), omit the "end" field in timespan`,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `
|
||||
<EPISODE_CONTENT>
|
||||
${context.episodeContent}
|
||||
</EPISODE_CONTENT>
|
||||
|
||||
<PREVIOUS_EPISODES>
|
||||
${JSON.stringify(context.previousEpisodes, null, 2)}
|
||||
</PREVIOUS_EPISODES>
|
||||
|
||||
<AVAILABLE_ENTITIES>
|
||||
${JSON.stringify(context.entities, null, 2)}
|
||||
</AVAILABLE_ENTITIES>
|
||||
`,
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Detect contradictions between statements in the knowledge graph
|
||||
*/
|
||||
export const detect_contradictions = (
|
||||
context: Record<string, any>,
|
||||
): CoreMessage[] => {
|
||||
return [
|
||||
{
|
||||
role: "system",
|
||||
content:
|
||||
"You are a knowledge graph reasoning expert that identifies contradictions between statements. " +
|
||||
"Your task is to analyze pairs of statements and determine if they contradict each other " +
|
||||
"based on their temporal validity and factual content.",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `
|
||||
I need to detect contradictions between statements in a temporal knowledge graph.
|
||||
|
||||
<NEW STATEMENT>
|
||||
${context.newStatement}
|
||||
</NEW STATEMENT>
|
||||
|
||||
<EXISTING STATEMENTS>
|
||||
${JSON.stringify(context.existingStatements, null, 2)}
|
||||
</EXISTING STATEMENTS>
|
||||
|
||||
<REFERENCE TIME>
|
||||
${context.referenceTime}
|
||||
</REFERENCE TIME>
|
||||
|
||||
Determine if the NEW STATEMENT contradicts any of the EXISTING STATEMENTS.
|
||||
A contradiction occurs when:
|
||||
|
||||
1. Two statements assert incompatible facts about the same subject-predicate pair
|
||||
2. The statements overlap in their temporal validity periods
|
||||
|
||||
For example, if one statement says "John works at Company A from January 2023" and another says
|
||||
"John works at Company B from March 2023", these would contradict if a person can only work at one
|
||||
company at a time.
|
||||
|
||||
Format your response as a JSON object with the following structure:
|
||||
{
|
||||
"hasContradiction": true/false,
|
||||
"contradictedStatements": [
|
||||
{
|
||||
"statementId": "[ID of the contradicted statement]",
|
||||
"reason": "[Explanation of why these statements contradict]",
|
||||
"temporalRelationship": "[overlapping/containing/contained/after/before]"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Important guidelines:
|
||||
- Consider the temporal validity of statements
|
||||
- Only mark as contradictions if statements are truly incompatible
|
||||
- Provide clear reasoning for each identified contradiction
|
||||
- Consider the context and domain constraints
|
||||
- If no contradictions exist, return an empty contradictedStatements array
|
||||
`,
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Analyze similar statements to determine duplications and contradictions
|
||||
* This prompt helps the LLM evaluate semantically similar statements found through vector search
|
||||
* to determine if they are duplicates or contradictions
|
||||
*/
|
||||
export const resolve_statements = (
|
||||
context: Record<string, any>,
|
||||
): CoreMessage[] => {
|
||||
return [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a knowledge graph expert that analyzes statements to detect duplications and contradictions.
|
||||
You analyze multiple new statements against existing statements to determine whether the new statement duplicates any existing statement or contradicts any existing statement.
|
||||
Pay special attention to temporal aspects, event updates, and context changes. If an event changes (like a date shift), statements about the original event are likely contradicted by statements about the updated event.
|
||||
|
||||
|
||||
I need to analyze whether a new statement duplicates or contradicts existing statements in a knowledge graph.
|
||||
|
||||
|
||||
Follow these instructions carefully:
|
||||
|
||||
1. Analyze if the new statement is a semantic duplicate of any existing statement
|
||||
- Two statements are duplicates if they express the same meaning even with different wording
|
||||
- Consider entity resolution has already been done, so different entity names are NOT an issue
|
||||
|
||||
2. Determine if the new statement contradicts any existing valid statements
|
||||
- Contradictions occur when statements cannot both be true at the same time
|
||||
- Pay special attention to negations, opposites, and mutually exclusive facts
|
||||
- Consider temporal validity - statements may only be contradictions within specific time periods
|
||||
|
||||
3. IMPORTANT: For events that change (like rescheduled appointments, moved dates, changed locations):
|
||||
- When an event changes date/time/location, new statements about the updated event likely contradict statements about the original event
|
||||
- Look for contextual clues about event changes, cancellations, or rescheduling
|
||||
- Example: If "Concert on June 10" moved to "Concert on June 12", then "John attends June 10 concert" contradicts "John doesn't attend June 12 concert"
|
||||
|
||||
4. Format your response as a JSON object with the following structure:
|
||||
<output>
|
||||
[{
|
||||
"statementId": "new_statement_uuid",
|
||||
"isDuplicate": true/false,
|
||||
"duplicateId": "existing_statement_uuid-if-duplicate-exists",
|
||||
"contradictions": ["existing_statement_uuid-1", "existing_statement_uuid-2"], // UUIDs of any contradicted statements
|
||||
}]
|
||||
</output>
|
||||
|
||||
Important guidelines:
|
||||
- If the new statement is a duplicate, include the UUID of the duplicate statement
|
||||
- For contradictions, list all statement UUIDs that the new statement contradicts
|
||||
- If a statement is both a contradiction AND a duplicate (rare case), mark it as a duplicate
|
||||
- Identify temporal and contextual shifts that may create implicit contradictions
|
||||
- Don't give any reason, just give the final output.
|
||||
`,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `
|
||||
<NEW_STATEMENTS>
|
||||
${context.newStatements
|
||||
.map(
|
||||
(triple: Triple) => `
|
||||
StatementId: ${triple.statement.uuid}
|
||||
Fact: ${triple.statement.fact}
|
||||
Subject: ${triple.subject}
|
||||
Predicate: ${triple.predicate}
|
||||
Object: ${triple.object}
|
||||
---------------------------
|
||||
`,
|
||||
)
|
||||
.join("")}
|
||||
</NEW_STATEMENTS>
|
||||
|
||||
<SIMILAR_STATEMENTS>
|
||||
${JSON.stringify(context.similarStatements, null, 2)}
|
||||
</SIMILAR_STATEMENTS>
|
||||
|
||||
<EPISODE_CONTENT>
|
||||
${context.episodeContent}
|
||||
</EPISODE_CONTENT>
|
||||
|
||||
<REFERENCE_TIME>
|
||||
${context.referenceTime}
|
||||
</REFERENCE_TIME> `,
|
||||
},
|
||||
];
|
||||
};
|
||||
Loading…
x
Reference in New Issue
Block a user