refactor: implement statement extraction and resolution

2026-01-10 23:48:26 +00:00 · 2025-06-03 19:20:56 +05:30 · 2025-06-03 19:20:56 +05:30 · 96d829642b
commit 96d829642b
parent 0853a30897
5 changed files with 579 additions and 147 deletions
--- a/apps/webapp/app/services/knowledgeGraph.server.ts
+++ b/apps/webapp/app/services/knowledgeGraph.server.ts
@ -10,7 +10,8 @@ import {
 import { LLMMappings, LLMModelEnum } from "@recall/types";
 import { logger } from "./logger.service";
 import crypto from "crypto";
-import { extract_message, extract_text } from "./prompts/extractNodes";
+import { dedupeNodes, extract_message, extract_text } from "./prompts/nodes";
+import { extract_statements } from "./prompts/statements";

 export enum EpisodeType {
  Conversation = "CONVERSATION",
@ -41,7 +42,7 @@ export interface EpisodicNode {
 * Entities represent subjects, objects, or predicates in statements
 */
 export interface EntityNode {
-  uuid?: string;
+  uuid: string;
  name: string;
  type: string;
  attributes: Record<string, any>;
@ -211,8 +212,8 @@ export class KnowledgeGraphService {

      // Step 5: Statement Extraction - Extract statements (triples) instead of direct edges
      const extractedStatements = await this.extractStatements(
-        resolvedNodes,
        episode,
+        resolvedNodes,
        previousEpisodes,
      );

@ -292,21 +293,27 @@ export class KnowledgeGraphService {
      },
    );

-    const extractedEntities = JSON.parse(responseText || "{}").entities || [];
-
    // Convert to EntityNode objects
    const entities: EntityNode[] = [];

-    for (const entity of extractedEntities) {
-      entities.push({
-        uuid: crypto.randomUUID(),
-        name: entity.name,
-        type: entity.type,
-        attributes: entity.attributes || {},
-        nameEmbedding: [], // Will be populated later
-        createdAt: new Date(),
-        userId: episode.userId,
-      });
+    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
+    if (outputMatch && outputMatch[1]) {
+      responseText = outputMatch[1].trim();
+      const extractedEntities = JSON.parse(responseText || "{}").entities || [];
+
+      entities.push(
+        ...(await Promise.all(
+          extractedEntities.map(async (entity: any) => ({
+            uuid: crypto.randomUUID(),
+            name: entity.name,
+            type: entity.type,
+            attributes: entity.attributes || {},
+            nameEmbedding: await this.getEmbedding(entity.name),
+            createdAt: new Date(),
+            userId: episode.userId,
+          })),
+        )),
+      );
    }

    return entities;
@ -321,55 +328,102 @@ export class KnowledgeGraphService {
    previousEpisodes: EpisodicNode[],
  ): Promise<{ resolvedNodes: EntityNode[]; uuidMap: Map<string, string> }> {
    const uuidMap = new Map<string, string>();
-    const resolvedNodes: EntityNode[] = [];

-    for (const extractedNode of extractedNodes) {
-      // Generate embedding for the node name
-      const nameEmbedding = await this.getEmbedding(extractedNode.name);
+    const existingNodesLists = await Promise.all(
+      extractedNodes.map(async (extractedNode) => {
+        // Check if a similar node already exists in HelixDB
+        // Use vector similarity search to find similar entities
+        // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance)
+        const similarEntities = await helixClient.query("findSimilarEntities", {
+          queryEmbedding: extractedNode.nameEmbedding,
+          limit: 5, // Get top 5 matches
+          threshold: 0.85, // 85% similarity threshold
+        });

-      // Check if a similar node already exists in HelixDB
-      // Use vector similarity search to find similar entities
-      // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance)
-      const similarEntities = await helixClient.query("findSimilarEntities", {
-        queryEmbedding: nameEmbedding,
-        limit: 5, // Get top 5 matches
-        threshold: 0.85, // 85% similarity threshold
+        return similarEntities.nodes;
+      }),
+    );
+
+    if (!existingNodesLists || existingNodesLists.length === 0) {
+      extractedNodes.forEach((node) => {
+        uuidMap.set(node.uuid, node.uuid);
      });
-
-      const existingNodes = similarEntities.nodes;
-
-      // Get entity types dictionary or empty object if not provided
-      const entityTypesDict = entity_types || {};
-
-      if (similarEntities.length > 0) {
-        // If similar nodes exist, we need to decide if we want to merge with an existing one
-        // This could involve LLM to determine if they're the same entity
-        const existingNode = similarEntities[0];
-
-        // Map the extracted node UUID to the existing node UUID
-        uuidMap.set(extractedNode.uuid, existingNode.uuid);
-
-        // Add the existing node to our resolved nodes if not already present
-        if (!resolvedNodes.some((node) => node.uuid === existingNode.uuid)) {
-          resolvedNodes.push({
-            uuid: existingNode.uuid,
-            name: existingNode.name,
-            type: existingNode.type,
-            attributes: existingNode.attributes || {},
-            nameEmbedding: existingNode.nameEmbedding,
-            createdAt: new Date(existingNode.createdAt),
-            userId: existingNode.userId,
-          });
-        }
-      } else {
-        // This is a new entity, add embedding and keep as is
-        extractedNode.nameEmbedding = nameEmbedding;
-        resolvedNodes.push(extractedNode);
-        uuidMap.set(extractedNode.uuid, extractedNode.uuid);
-      }
+      return { resolvedNodes: extractedNodes, uuidMap };
    }

-    return { resolvedNodes, uuidMap };
+    // Prepare context for LLM
+    const extractedNodesContext = extractedNodes.map(
+      (node: EntityNode, i: number) => {
+        return {
+          id: i,
+          name: node.name,
+          entity_type: node.type,
+          entity_type_description: "Default Entity Type",
+          duplication_candidates: existingNodesLists[i].map(
+            (candidate: EntityNode, j: number) => ({
+              idx: j,
+              name: candidate.name,
+              entity_types: candidate.type,
+              ...candidate.attributes,
+            }),
+          ),
+        };
+      },
+    );
+
+    const context = {
+      extracted_nodes: extractedNodesContext,
+      episode_content: episode ? episode.content : "",
+      previous_episodes: previousEpisodes
+        ? previousEpisodes.map((ep) => ep.content)
+        : [],
+    };
+
+    const messages = dedupeNodes(context);
+
+    let responseText = "";
+
+    await this.makeModelCall(
+      false,
+      LLMModelEnum.GPT41,
+      messages as CoreMessage[],
+      (text) => {
+        responseText = text;
+      },
+    );
+
+    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
+    if (outputMatch && outputMatch[1]) {
+      responseText = outputMatch[1].trim();
+      const parsedResponse = JSON.parse(responseText);
+      const nodeResolutions = parsedResponse.entity_resolutions || [];
+
+      // Process each node resolution to either map to an existing node or keep as new
+      const resolvedNodes = nodeResolutions.map((resolution: any) => {
+        const resolutionId = resolution.id ?? -1;
+        const duplicateIdx = resolution.duplicate_idx ?? -1;
+        const extractedNode = extractedNodes[resolutionId];
+
+        // If a duplicate was found, use the existing node, otherwise use the extracted node
+        const resolvedNode =
+          duplicateIdx >= 0 &&
+          duplicateIdx < existingNodesLists[resolutionId]?.length
+            ? existingNodesLists[resolutionId][duplicateIdx]
+            : extractedNode;
+
+        // Update the name if provided in the resolution
+        if (resolution.name) {
+          resolvedNode.name = resolution.name;
+        }
+
+        // Map the extracted UUID to the resolved UUID
+        uuidMap.set(extractedNode.uuid, resolvedNode.uuid);
+
+        return resolvedNode;
+      });
+
+      return { resolvedNodes, uuidMap };
+    }
  }

  /**
@ -388,18 +442,16 @@ export class KnowledgeGraphService {
        content: ep.content,
        createdAt: ep.createdAt.toISOString(),
      })),
-      nodes: resolvedNodes.map((node) => ({
+      entities: resolvedNodes.map((node) => ({
        name: node.name,
        type: node.type,
        uuid: node.uuid,
      })),
      referenceTime: episode.validAt.toISOString(),
-      relationshipTypes: {}, // Could be populated with relationship definitions
    };

    // Get the statement extraction prompt from the prompt library
-    // Note: You might need to update your prompts to extract subject-predicate-object patterns
-    const messages = promptLibrary.extractEdges.edge.call(context);
+    const messages = extract_statements(context);

    let responseText = "";

@ -413,65 +465,90 @@ export class KnowledgeGraphService {
    );

    // Parse the statements from the LLM response
-    // This will need to be updated based on your prompt format
    const extractedTriples = JSON.parse(responseText || "{}").edges || [];

-    // Convert to Triple objects with Statement nodes
-    const triples: Triple[] = [];
+    // Convert extracted triples to Triple objects with Statement nodes
+    const triples = await Promise.all(
+      // Fix: Type 'any'.
+      extractedTriples.map(async (triple: any) => {
+        // Find the subject and object nodes
+        const subjectNode = resolvedNodes.find(
+          (node) => node.name.toLowerCase() === triple.source.toLowerCase(),
+        );

-    for (const triple of extractedTriples) {
-      const subjectNode = resolvedNodes.find(
-        (node) =>
-          node.name.toLowerCase() === triple.sourceEntityName.toLowerCase(),
-      );
+        const objectNode = resolvedNodes.find(
+          (node) => node.name.toLowerCase() === triple.target.toLowerCase(),
+        );

-      const objectNode = resolvedNodes.find(
-        (node) =>
-          node.name.toLowerCase() === triple.targetEntityName.toLowerCase(),
-      );
-
-      // Find or create a predicate node for the relationship type
-      const predicateNode = resolvedNodes.find(
-        (node) =>
-          node.name.toLowerCase() === triple.relationshipType.toLowerCase(),
-      ) || {
-        uuid: crypto.randomUUID(),
-        name: triple.relationshipType,
-        type: "Predicate",
-        attributes: {},
-        nameEmbedding: [], // Will be populated later
-        createdAt: new Date(),
-        userId: episode.userId,
-      };
-
-      if (subjectNode && objectNode) {
-        // Generate embedding for the fact
-        const factEmbedding = await this.getEmbedding(triple.fact);
-
-        // Create a statement node
-        const statement: StatementNode = {
+        // Find or create a predicate node for the relationship type
+        const predicateNode = resolvedNodes.find(
+          (node) =>
+            node.name.toLowerCase() === triple.relationship.toLowerCase(),
+        ) || {
          uuid: crypto.randomUUID(),
-          fact: triple.fact,
-          groupId: crypto.randomUUID().slice(0, 8), // Could be used to group related statements
+          name: triple.relationship,
+          type: "Predicate",
+          attributes: {},
+          nameEmbedding: await this.getEmbedding(triple.relationship),
          createdAt: new Date(),
-          validAt: triple.validAt ? new Date(triple.validAt) : episode.validAt,
-          invalidAt: triple.invalidAt ? new Date(triple.invalidAt) : undefined,
-          attributesJson: JSON.stringify({}), // Could store additional metadata
-          embedding: factEmbedding,
          userId: episode.userId,
        };

-        triples.push({
-          statement,
-          subject: subjectNode,
-          predicate: predicateNode,
-          object: objectNode,
-          provenance: episode,
-        });
-      }
+        if (subjectNode && objectNode) {
+          // Create a statement node
+          const statement: StatementNode = {
+            uuid: crypto.randomUUID(),
+            fact: triple.fact,
+            factEmbedding: await this.getEmbedding(triple.fact),
+            createdAt: new Date(),
+            validAt: episode.validAt,
+            invalidAt: null,
+            attributes: triple.attributes || {},
+            userId: episode.userId,
+          };
+
+          return {
+            statement,
+            subject: subjectNode,
+            predicate: predicateNode,
+            object: objectNode,
+            provenance: episode,
+          };
+        }
+        return null;
+      }),
+    );
+
+    // Filter out null values (where subject or object wasn't found)
+    return triples.filter(Boolean) as Triple[];
+  }
+
+  private async resolvePredicateNodes(
+    triples: Triple[],
+    episode: EpisodicNode,
+  ) {
+    const predicateNodes: EntityNode[] = triples.map((triple: Triple) => {
+      return triple.predicate;
+    });
+
+    if (predicateNodes.length === 0) {
+      return;
    }

-    return triples;
+    const existingNodesLists = await Promise.all(
+      predicateNodes.map(async (predicateNode) => {
+        // Check if a similar node already exists in HelixDB
+        // Use vector similarity search to find similar entities
+        // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance)
+        const similarEntities = await helixClient.query("findSimilarEntities", {
+          queryEmbedding: predicateNode.nameEmbedding,
+          limit: 5, // Get top 5 matches
+          threshold: 0.85, // 85% similarity threshold
+        });
+
+        return similarEntities.nodes;
+      }),
+    );
  }

  /**
--- a/apps/webapp/app/services/prompts/extractStatements.ts
+++ b/apps/webapp/app/services/prompts/extractStatements.ts
--- a/apps/webapp/app/services/prompts/index.ts
+++ b/apps/webapp/app/services/prompts/index.ts
@ -3,6 +3,6 @@
 */

 // Export types from individual prompt modules
-export { type ExtractedEntity, type ExtractedEntities } from "./extractNodes";
+export { type ExtractedEntity, type ExtractedEntities } from "./nodes";
 export { type Edge, type ExtractedEdges } from "./extractEdges";
 export { type ContradictionResult } from "./contradiction";
--- a/apps/webapp/app/services/prompts/extractNodes.ts
+++ b/apps/webapp/app/services/prompts/extractNodes.ts
@ -34,21 +34,13 @@ export interface EntityClassification {
 export const extract_message = (
  context: Record<string, any>,
 ): CoreMessage[] => {
-  const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages. 
-Your primary task is to extract and classify significant entities mentioned in the conversation.`;
+  const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
+Your primary task is to extract and classify significant entities mentioned in the conversation.

-  const userPrompt = `
-<PREVIOUS EPISODES>
-${JSON.stringify(context.previousEpisodes || [], null, 2)}
-</PREVIOUS EPISODES>
-
-<CURRENT EPISODE>
-${context.episodeContent}
-</CURRENT EPISODE>
-
-<ENTITY TYPES>
-${JSON.stringify(context.entityTypes || {}, null, 2)}
-</ENTITY TYPES>
+In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
+Focus on extracting:
+1. Subject entities (people, objects, concepts)
+2. Object entities (people, objects, concepts)

 Instructions:

@ -63,14 +55,34 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
   - Assign the appropriate type for each one.

 3. **Exclusions**:
-   - Do NOT extract entities representing relationships or actions.
+   - Do NOT extract entities representing relationships or actions (predicates will be handled separately).
   - Do NOT extract dates, times, or other temporal information—these will be handled separately.

 4. **Formatting**:
   - Be **explicit and unambiguous** in naming entities (e.g., use full names when available).

-${context.customPrompt || ""}
-`;
+
+Format your response as a JSON object with the following structure:
+<output>
+{
+  "entities": [
+    {
+      "name": "Entity Name",
+      "type": "Entity Type",
+    }
+    // Additional entities...
+  ]
+}
+</output>`;
+
+  const userPrompt = `
+<PREVIOUS EPISODES>
+${JSON.stringify(context.previousEpisodes || [], null, 2)}
+</PREVIOUS EPISODES>
+
+<CURRENT EPISODE>
+${context.episodeContent}
+</CURRENT EPISODE>`;

  return [
    { role: "system", content: sysPrompt },
@ -82,29 +94,50 @@ ${context.customPrompt || ""}
 * Extract entities from text-based content
 */
 export const extract_text = (context: Record<string, any>): CoreMessage[] => {
-  const sysPrompt = `You are an AI assistant that extracts entity nodes from text. 
-Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text.`;
+  const sysPrompt = `
+  You are an AI assistant that extracts entity nodes from text for a reified knowledge graph.
+Your primary task is to extract and classify significant entities mentioned in the provided text.

+In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
+Focus on extracting:
+1. Subject entities (people, objects, concepts)
+2. Object entities (people, objects, concepts)
+
+Instructions:
+
+You are given a TEXT. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the TEXT.
+
+1. **Entity Identification**:
+   - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
+
+2. **Entity Classification**:
+   - Use the descriptions in ENTITY TYPES to classify each extracted entity.
+   - Assign the appropriate type for each one.
+
+3. **Exclusions**:
+   - Do NOT extract entities representing relationships or actions (predicates will be handled separately).
+   - Do NOT extract dates, times, or other temporal information—these will be handled separately.
+
+4. **Formatting**:
+   - Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
+
+
+Format your response as a JSON object with the following structure:
+<output>
+{
+  "entities": [
+    {
+      "name": "Entity Name",
+      "type": "Entity Type"
+    }
+    // Additional entities...
+  ]
+}
+</output>`;
  const userPrompt = `
 <TEXT>
 ${context.episodeContent}
 </TEXT>
-
-<ENTITY TYPES>
-${JSON.stringify(context.entityTypes || {}, null, 2)}
-</ENTITY TYPES>
-
-Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned.
-For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
-Indicate the classified entity type by providing its entity_type_id.
-
-${context.customPrompt || ""}
-
-Guidelines:
-1. Extract significant entities, concepts, or actors mentioned in the conversation.
-2. Avoid creating nodes for relationships or actions.
-3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
-4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
 `;

  return [
@ -218,3 +251,82 @@ ${JSON.stringify(context.node, null, 2)}
    },
  ];
 };
+
+/**
+ * Resolve entity duplications
+ */
+export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
+  return [
+    {
+      role: "system",
+      content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
+      
+Each entity in ENTITIES is represented as a JSON object with the following structure:
+{
+    id: integer id of the entity,
+    name: "name of the entity",
+    entity_type: "ontological classification of the entity",
+    entity_type_description: "Description of what the entity type represents",
+    duplication_candidates: [
+        {
+            idx: integer index of the candidate entity,
+            name: "name of the candidate entity",
+            entity_type: "ontological classification of the candidate entity",
+            ...<additional attributes>
+        }
+    ]
+}
+
+For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
+Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
+Do NOT mark entities as duplicates if:
+- They are related but distinct.
+- They have similar names or purposes but refer to separate instances or concepts.
+
+Task:
+Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity.
+
+For each entity, include:
+- "id": the id of the entity (integer)
+- "name": the name of the entity (string)
+- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
+
+Format your response as follows:
+<output>
+{
+  "entity_resolutions": [
+    {
+      "id": 0,
+      "name": "Entity Name",
+      "duplicate_idx": -1
+    },
+    // Additional entity resolutions...
+  ]
+}
+</output>
+
+Notes:
+- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate.
+- If an entity is not a duplicate of any candidate, set duplicate_idx to -1.
+- Always include all entities from the input in your response.
+- Always wrap the output in these tags <output> </output>
+    `,
+    },
+    {
+      role: "user",
+      content: `
+<PREVIOUS EPISODES>
+${JSON.stringify(context.previousEpisodes || [], null, 2)}
+</PREVIOUS EPISODES>
+
+<CURRENT EPISODE>
+${context.episodeContent}
+</CURRENT EPISODE>
+
+<ENTITIES>
+${JSON.stringify(context.extracted_nodes, null, 2)}
+</ENTITIES>
+`,
+    },
+  ];
+};
--- a/apps/webapp/app/services/prompts/statements.ts
+++ b/apps/webapp/app/services/prompts/statements.ts
@ -0,0 +1,243 @@
+import { type CoreMessage } from "ai";
+import { type Triple } from "../knowledgeGraph.server";
+
+/**
+ * Extract statements (triples) from episode content in a reified knowledge graph model
+ * This function generates a prompt for LLM to extract subject-predicate-object statements
+ * and represent them as first-class nodes with proper connections
+ */
+export const extract_statements = (
+  context: Record<string, any>,
+): CoreMessage[] => {
+  return [
+    {
+      role: "system",
+      content: `You are a knowledge graph expert that extracts factual statements from text as subject-predicate-object triples.
+Your task is to identify important facts and represent them in a reified knowledge graph model
+where each statement is a first-class node connected to subject, predicate, and object entities.
+
+I need to extract factual statements from the following conversation/text and represent them in a reified knowledge graph.
+
+Follow these instructions carefully:
+
+1. Identify key factual statements from the episode content and previous episodes
+2. Represent each statement as a subject-predicate-object triple
+3. Only use entities from the AVAILABLE ENTITIES list as subjects and objects
+4. For each statement, provide:
+   - The subject entity name (must match exactly one from AVAILABLE ENTITIES)
+   - The predicate/relationship (a clear, concise verb or relationship type)
+   - The object entity name (must match exactly one from AVAILABLE ENTITIES)
+   - A natural language fact that accurately represents the triple
+   - Any additional attributes relevant to the relationship
+
+IMPORTANT ABOUT TEMPORAL INFORMATION:
+- The system tracks when facts become known (validAt) and contradicted (invalidAt) separately
+- You must include any temporal information WITHIN the fact statement itself
+- For example, if someone worked at a company from 2015-2020, include this in the "fact" field and "attributes.timespan" field
+- Do NOT omit temporal information from facts - it's critical context
+- Examples of good temporal facts:
+  * "John worked at Google from 2015 to 2020"
+  * "Sarah lived in New York until 2018"
+  * "The project was completed on March 15, 2023"
+
+Format your response as a JSON object with the following structure:
+<output>
+{
+  "edges": [
+    {
+      "source": "[Subject Entity Name]",
+      "relationship": "[Predicate/Relationship Type]",
+      "target": "[Object Entity Name]", 
+      "fact": "[Natural language representation of the fact INCLUDING any temporal information]",
+      "attributes": { 
+        "confidence": 0.9, // How confident you are in this fact (0-1)
+        "source": "explicit", // Whether the fact was explicitly stated or inferred
+        "timespan": { // Include if the fact has a specific time period
+          "start": "2015", // When the fact started being true (if known)
+          "end": "2020" // When the fact stopped being true (if known)
+        }
+      }
+    },
+    // Additional statements...
+  ]
+}
+</output>
+
+Important guidelines:
+- Only include the most significant and factual statements
+- Do not invent entities not present in the AVAILABLE ENTITIES list
+- Be precise in representing the relationships
+- Each fact should be atomic (representing a single piece of information)
+- ALWAYS include temporal information when available (dates, periods, etc.) in both the fact text AND attributes
+- Facts should be based on the episode content, not general knowledge
+- Aim for quality over quantity, prioritize clear, unambiguous statements
+- For ongoing facts (still true), omit the "end" field in timespan`,
+    },
+    {
+      role: "user",
+      content: `
+<EPISODE_CONTENT>
+${context.episodeContent}
+</EPISODE_CONTENT>
+
+<PREVIOUS_EPISODES>
+${JSON.stringify(context.previousEpisodes, null, 2)}
+</PREVIOUS_EPISODES>
+
+<AVAILABLE_ENTITIES>
+${JSON.stringify(context.entities, null, 2)}
+</AVAILABLE_ENTITIES>
+`,
+    },
+  ];
+};
+
+/**
+ * Detect contradictions between statements in the knowledge graph
+ */
+export const detect_contradictions = (
+  context: Record<string, any>,
+): CoreMessage[] => {
+  return [
+    {
+      role: "system",
+      content:
+        "You are a knowledge graph reasoning expert that identifies contradictions between statements. " +
+        "Your task is to analyze pairs of statements and determine if they contradict each other " +
+        "based on their temporal validity and factual content.",
+    },
+    {
+      role: "user",
+      content: `
+I need to detect contradictions between statements in a temporal knowledge graph.
+
+<NEW STATEMENT>
+${context.newStatement}
+</NEW STATEMENT>
+
+<EXISTING STATEMENTS>
+${JSON.stringify(context.existingStatements, null, 2)}
+</EXISTING STATEMENTS>
+
+<REFERENCE TIME>
+${context.referenceTime}
+</REFERENCE TIME>
+
+Determine if the NEW STATEMENT contradicts any of the EXISTING STATEMENTS.
+A contradiction occurs when:
+
+1. Two statements assert incompatible facts about the same subject-predicate pair
+2. The statements overlap in their temporal validity periods
+
+For example, if one statement says "John works at Company A from January 2023" and another says 
+"John works at Company B from March 2023", these would contradict if a person can only work at one 
+company at a time.
+
+Format your response as a JSON object with the following structure:
+{
+  "hasContradiction": true/false,
+  "contradictedStatements": [
+    {
+      "statementId": "[ID of the contradicted statement]",
+      "reason": "[Explanation of why these statements contradict]",
+      "temporalRelationship": "[overlapping/containing/contained/after/before]"
+    }
+  ]
+}
+
+Important guidelines:
+- Consider the temporal validity of statements
+- Only mark as contradictions if statements are truly incompatible
+- Provide clear reasoning for each identified contradiction
+- Consider the context and domain constraints
+- If no contradictions exist, return an empty contradictedStatements array
+`,
+    },
+  ];
+};
+
+/**
+ * Analyze similar statements to determine duplications and contradictions
+ * This prompt helps the LLM evaluate semantically similar statements found through vector search
+ * to determine if they are duplicates or contradictions
+ */
+export const resolve_statements = (
+  context: Record<string, any>,
+): CoreMessage[] => {
+  return [
+    {
+      role: "system",
+      content: `You are a knowledge graph expert that analyzes statements to detect duplications and contradictions. 
+You analyze multiple new statements against existing statements to determine whether the new statement duplicates any existing statement or contradicts any existing statement.
+Pay special attention to temporal aspects, event updates, and context changes. If an event changes (like a date shift), statements about the original event are likely contradicted by statements about the updated event.
+
+
+I need to analyze whether a new statement duplicates or contradicts existing statements in a knowledge graph.
+  
+  
+Follow these instructions carefully:
+ 
+1. Analyze if the new statement is a semantic duplicate of any existing statement
+   - Two statements are duplicates if they express the same meaning even with different wording
+   - Consider entity resolution has already been done, so different entity names are NOT an issue
+
+2. Determine if the new statement contradicts any existing valid statements
+   - Contradictions occur when statements cannot both be true at the same time
+   - Pay special attention to negations, opposites, and mutually exclusive facts
+   - Consider temporal validity - statements may only be contradictions within specific time periods
+   
+3. IMPORTANT: For events that change (like rescheduled appointments, moved dates, changed locations):
+   - When an event changes date/time/location, new statements about the updated event likely contradict statements about the original event
+   - Look for contextual clues about event changes, cancellations, or rescheduling
+   - Example: If "Concert on June 10" moved to "Concert on June 12", then "John attends June 10 concert" contradicts "John doesn't attend June 12 concert"
+
+  4. Format your response as a JSON object with the following structure:
+<output>
+[{
+    "statementId": "new_statement_uuid",
+    "isDuplicate": true/false,
+    "duplicateId": "existing_statement_uuid-if-duplicate-exists",
+    "contradictions": ["existing_statement_uuid-1", "existing_statement_uuid-2"], // UUIDs of any contradicted statements
+    }]
+</output>
+  
+  Important guidelines:
+- If the new statement is a duplicate, include the UUID of the duplicate statement
+- For contradictions, list all statement UUIDs that the new statement contradicts
+- If a statement is both a contradiction AND a duplicate (rare case), mark it as a duplicate
+- Identify temporal and contextual shifts that may create implicit contradictions
+- Don't give any reason, just give the final output.
+`,
+    },
+    {
+      role: "user",
+      content: `
+  <NEW_STATEMENTS>
+  ${context.newStatements
+    .map(
+      (triple: Triple) => `
+  StatementId: ${triple.statement.uuid}
+  Fact: ${triple.statement.fact}
+  Subject: ${triple.subject}
+  Predicate: ${triple.predicate}
+  Object: ${triple.object}
+  ---------------------------
+  `,
+    )
+    .join("")}
+  </NEW_STATEMENTS>
+  
+  <SIMILAR_STATEMENTS>
+  ${JSON.stringify(context.similarStatements, null, 2)}
+  </SIMILAR_STATEMENTS>
+  
+  <EPISODE_CONTENT>
+  ${context.episodeContent}
+  </EPISODE_CONTENT>
+  
+  <REFERENCE_TIME>
+  ${context.referenceTime}
+  </REFERENCE_TIME>  `,
+    },
+  ];
+};