diff --git a/apps/webapp/app/services/knowledgeGraph.server.ts b/apps/webapp/app/services/knowledgeGraph.server.ts index b6b07ce..f020bff 100644 --- a/apps/webapp/app/services/knowledgeGraph.server.ts +++ b/apps/webapp/app/services/knowledgeGraph.server.ts @@ -10,7 +10,8 @@ import { import { LLMMappings, LLMModelEnum } from "@recall/types"; import { logger } from "./logger.service"; import crypto from "crypto"; -import { extract_message, extract_text } from "./prompts/extractNodes"; +import { dedupeNodes, extract_message, extract_text } from "./prompts/nodes"; +import { extract_statements } from "./prompts/statements"; export enum EpisodeType { Conversation = "CONVERSATION", @@ -41,7 +42,7 @@ export interface EpisodicNode { * Entities represent subjects, objects, or predicates in statements */ export interface EntityNode { - uuid?: string; + uuid: string; name: string; type: string; attributes: Record; @@ -211,8 +212,8 @@ export class KnowledgeGraphService { // Step 5: Statement Extraction - Extract statements (triples) instead of direct edges const extractedStatements = await this.extractStatements( - resolvedNodes, episode, + resolvedNodes, previousEpisodes, ); @@ -292,21 +293,27 @@ export class KnowledgeGraphService { }, ); - const extractedEntities = JSON.parse(responseText || "{}").entities || []; - // Convert to EntityNode objects const entities: EntityNode[] = []; - for (const entity of extractedEntities) { - entities.push({ - uuid: crypto.randomUUID(), - name: entity.name, - type: entity.type, - attributes: entity.attributes || {}, - nameEmbedding: [], // Will be populated later - createdAt: new Date(), - userId: episode.userId, - }); + const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); + if (outputMatch && outputMatch[1]) { + responseText = outputMatch[1].trim(); + const extractedEntities = JSON.parse(responseText || "{}").entities || []; + + entities.push( + ...(await Promise.all( + extractedEntities.map(async (entity: any) => ({ + uuid: crypto.randomUUID(), + name: entity.name, + type: entity.type, + attributes: entity.attributes || {}, + nameEmbedding: await this.getEmbedding(entity.name), + createdAt: new Date(), + userId: episode.userId, + })), + )), + ); } return entities; @@ -321,55 +328,102 @@ export class KnowledgeGraphService { previousEpisodes: EpisodicNode[], ): Promise<{ resolvedNodes: EntityNode[]; uuidMap: Map }> { const uuidMap = new Map(); - const resolvedNodes: EntityNode[] = []; - for (const extractedNode of extractedNodes) { - // Generate embedding for the node name - const nameEmbedding = await this.getEmbedding(extractedNode.name); + const existingNodesLists = await Promise.all( + extractedNodes.map(async (extractedNode) => { + // Check if a similar node already exists in HelixDB + // Use vector similarity search to find similar entities + // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance) + const similarEntities = await helixClient.query("findSimilarEntities", { + queryEmbedding: extractedNode.nameEmbedding, + limit: 5, // Get top 5 matches + threshold: 0.85, // 85% similarity threshold + }); - // Check if a similar node already exists in HelixDB - // Use vector similarity search to find similar entities - // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance) - const similarEntities = await helixClient.query("findSimilarEntities", { - queryEmbedding: nameEmbedding, - limit: 5, // Get top 5 matches - threshold: 0.85, // 85% similarity threshold + return similarEntities.nodes; + }), + ); + + if (!existingNodesLists || existingNodesLists.length === 0) { + extractedNodes.forEach((node) => { + uuidMap.set(node.uuid, node.uuid); }); - - const existingNodes = similarEntities.nodes; - - // Get entity types dictionary or empty object if not provided - const entityTypesDict = entity_types || {}; - - if (similarEntities.length > 0) { - // If similar nodes exist, we need to decide if we want to merge with an existing one - // This could involve LLM to determine if they're the same entity - const existingNode = similarEntities[0]; - - // Map the extracted node UUID to the existing node UUID - uuidMap.set(extractedNode.uuid, existingNode.uuid); - - // Add the existing node to our resolved nodes if not already present - if (!resolvedNodes.some((node) => node.uuid === existingNode.uuid)) { - resolvedNodes.push({ - uuid: existingNode.uuid, - name: existingNode.name, - type: existingNode.type, - attributes: existingNode.attributes || {}, - nameEmbedding: existingNode.nameEmbedding, - createdAt: new Date(existingNode.createdAt), - userId: existingNode.userId, - }); - } - } else { - // This is a new entity, add embedding and keep as is - extractedNode.nameEmbedding = nameEmbedding; - resolvedNodes.push(extractedNode); - uuidMap.set(extractedNode.uuid, extractedNode.uuid); - } + return { resolvedNodes: extractedNodes, uuidMap }; } - return { resolvedNodes, uuidMap }; + // Prepare context for LLM + const extractedNodesContext = extractedNodes.map( + (node: EntityNode, i: number) => { + return { + id: i, + name: node.name, + entity_type: node.type, + entity_type_description: "Default Entity Type", + duplication_candidates: existingNodesLists[i].map( + (candidate: EntityNode, j: number) => ({ + idx: j, + name: candidate.name, + entity_types: candidate.type, + ...candidate.attributes, + }), + ), + }; + }, + ); + + const context = { + extracted_nodes: extractedNodesContext, + episode_content: episode ? episode.content : "", + previous_episodes: previousEpisodes + ? previousEpisodes.map((ep) => ep.content) + : [], + }; + + const messages = dedupeNodes(context); + + let responseText = ""; + + await this.makeModelCall( + false, + LLMModelEnum.GPT41, + messages as CoreMessage[], + (text) => { + responseText = text; + }, + ); + + const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); + if (outputMatch && outputMatch[1]) { + responseText = outputMatch[1].trim(); + const parsedResponse = JSON.parse(responseText); + const nodeResolutions = parsedResponse.entity_resolutions || []; + + // Process each node resolution to either map to an existing node or keep as new + const resolvedNodes = nodeResolutions.map((resolution: any) => { + const resolutionId = resolution.id ?? -1; + const duplicateIdx = resolution.duplicate_idx ?? -1; + const extractedNode = extractedNodes[resolutionId]; + + // If a duplicate was found, use the existing node, otherwise use the extracted node + const resolvedNode = + duplicateIdx >= 0 && + duplicateIdx < existingNodesLists[resolutionId]?.length + ? existingNodesLists[resolutionId][duplicateIdx] + : extractedNode; + + // Update the name if provided in the resolution + if (resolution.name) { + resolvedNode.name = resolution.name; + } + + // Map the extracted UUID to the resolved UUID + uuidMap.set(extractedNode.uuid, resolvedNode.uuid); + + return resolvedNode; + }); + + return { resolvedNodes, uuidMap }; + } } /** @@ -388,18 +442,16 @@ export class KnowledgeGraphService { content: ep.content, createdAt: ep.createdAt.toISOString(), })), - nodes: resolvedNodes.map((node) => ({ + entities: resolvedNodes.map((node) => ({ name: node.name, type: node.type, uuid: node.uuid, })), referenceTime: episode.validAt.toISOString(), - relationshipTypes: {}, // Could be populated with relationship definitions }; // Get the statement extraction prompt from the prompt library - // Note: You might need to update your prompts to extract subject-predicate-object patterns - const messages = promptLibrary.extractEdges.edge.call(context); + const messages = extract_statements(context); let responseText = ""; @@ -413,65 +465,90 @@ export class KnowledgeGraphService { ); // Parse the statements from the LLM response - // This will need to be updated based on your prompt format const extractedTriples = JSON.parse(responseText || "{}").edges || []; - // Convert to Triple objects with Statement nodes - const triples: Triple[] = []; + // Convert extracted triples to Triple objects with Statement nodes + const triples = await Promise.all( + // Fix: Type 'any'. + extractedTriples.map(async (triple: any) => { + // Find the subject and object nodes + const subjectNode = resolvedNodes.find( + (node) => node.name.toLowerCase() === triple.source.toLowerCase(), + ); - for (const triple of extractedTriples) { - const subjectNode = resolvedNodes.find( - (node) => - node.name.toLowerCase() === triple.sourceEntityName.toLowerCase(), - ); + const objectNode = resolvedNodes.find( + (node) => node.name.toLowerCase() === triple.target.toLowerCase(), + ); - const objectNode = resolvedNodes.find( - (node) => - node.name.toLowerCase() === triple.targetEntityName.toLowerCase(), - ); - - // Find or create a predicate node for the relationship type - const predicateNode = resolvedNodes.find( - (node) => - node.name.toLowerCase() === triple.relationshipType.toLowerCase(), - ) || { - uuid: crypto.randomUUID(), - name: triple.relationshipType, - type: "Predicate", - attributes: {}, - nameEmbedding: [], // Will be populated later - createdAt: new Date(), - userId: episode.userId, - }; - - if (subjectNode && objectNode) { - // Generate embedding for the fact - const factEmbedding = await this.getEmbedding(triple.fact); - - // Create a statement node - const statement: StatementNode = { + // Find or create a predicate node for the relationship type + const predicateNode = resolvedNodes.find( + (node) => + node.name.toLowerCase() === triple.relationship.toLowerCase(), + ) || { uuid: crypto.randomUUID(), - fact: triple.fact, - groupId: crypto.randomUUID().slice(0, 8), // Could be used to group related statements + name: triple.relationship, + type: "Predicate", + attributes: {}, + nameEmbedding: await this.getEmbedding(triple.relationship), createdAt: new Date(), - validAt: triple.validAt ? new Date(triple.validAt) : episode.validAt, - invalidAt: triple.invalidAt ? new Date(triple.invalidAt) : undefined, - attributesJson: JSON.stringify({}), // Could store additional metadata - embedding: factEmbedding, userId: episode.userId, }; - triples.push({ - statement, - subject: subjectNode, - predicate: predicateNode, - object: objectNode, - provenance: episode, - }); - } + if (subjectNode && objectNode) { + // Create a statement node + const statement: StatementNode = { + uuid: crypto.randomUUID(), + fact: triple.fact, + factEmbedding: await this.getEmbedding(triple.fact), + createdAt: new Date(), + validAt: episode.validAt, + invalidAt: null, + attributes: triple.attributes || {}, + userId: episode.userId, + }; + + return { + statement, + subject: subjectNode, + predicate: predicateNode, + object: objectNode, + provenance: episode, + }; + } + return null; + }), + ); + + // Filter out null values (where subject or object wasn't found) + return triples.filter(Boolean) as Triple[]; + } + + private async resolvePredicateNodes( + triples: Triple[], + episode: EpisodicNode, + ) { + const predicateNodes: EntityNode[] = triples.map((triple: Triple) => { + return triple.predicate; + }); + + if (predicateNodes.length === 0) { + return; } - return triples; + const existingNodesLists = await Promise.all( + predicateNodes.map(async (predicateNode) => { + // Check if a similar node already exists in HelixDB + // Use vector similarity search to find similar entities + // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance) + const similarEntities = await helixClient.query("findSimilarEntities", { + queryEmbedding: predicateNode.nameEmbedding, + limit: 5, // Get top 5 matches + threshold: 0.85, // 85% similarity threshold + }); + + return similarEntities.nodes; + }), + ); } /** diff --git a/apps/webapp/app/services/prompts/extractStatements.ts b/apps/webapp/app/services/prompts/extractStatements.ts deleted file mode 100644 index e69de29..0000000 diff --git a/apps/webapp/app/services/prompts/index.ts b/apps/webapp/app/services/prompts/index.ts index d81cc73..9ea72aa 100644 --- a/apps/webapp/app/services/prompts/index.ts +++ b/apps/webapp/app/services/prompts/index.ts @@ -3,6 +3,6 @@ */ // Export types from individual prompt modules -export { type ExtractedEntity, type ExtractedEntities } from "./extractNodes"; +export { type ExtractedEntity, type ExtractedEntities } from "./nodes"; export { type Edge, type ExtractedEdges } from "./extractEdges"; export { type ContradictionResult } from "./contradiction"; diff --git a/apps/webapp/app/services/prompts/extractNodes.ts b/apps/webapp/app/services/prompts/nodes.ts similarity index 55% rename from apps/webapp/app/services/prompts/extractNodes.ts rename to apps/webapp/app/services/prompts/nodes.ts index 1b06510..be7be15 100644 --- a/apps/webapp/app/services/prompts/extractNodes.ts +++ b/apps/webapp/app/services/prompts/nodes.ts @@ -34,21 +34,13 @@ export interface EntityClassification { export const extract_message = ( context: Record, ): CoreMessage[] => { - const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages. -Your primary task is to extract and classify significant entities mentioned in the conversation.`; + const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph. +Your primary task is to extract and classify significant entities mentioned in the conversation. - const userPrompt = ` - -${JSON.stringify(context.previousEpisodes || [], null, 2)} - - - -${context.episodeContent} - - - -${JSON.stringify(context.entityTypes || {}, null, 2)} - +In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements. +Focus on extracting: +1. Subject entities (people, objects, concepts) +2. Object entities (people, objects, concepts) Instructions: @@ -63,14 +55,34 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr - Assign the appropriate type for each one. 3. **Exclusions**: - - Do NOT extract entities representing relationships or actions. + - Do NOT extract entities representing relationships or actions (predicates will be handled separately). - Do NOT extract dates, times, or other temporal information—these will be handled separately. 4. **Formatting**: - Be **explicit and unambiguous** in naming entities (e.g., use full names when available). -${context.customPrompt || ""} -`; + +Format your response as a JSON object with the following structure: + +{ + "entities": [ + { + "name": "Entity Name", + "type": "Entity Type", + } + // Additional entities... + ] +} +`; + + const userPrompt = ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} +`; return [ { role: "system", content: sysPrompt }, @@ -82,29 +94,50 @@ ${context.customPrompt || ""} * Extract entities from text-based content */ export const extract_text = (context: Record): CoreMessage[] => { - const sysPrompt = `You are an AI assistant that extracts entity nodes from text. -Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text.`; + const sysPrompt = ` + You are an AI assistant that extracts entity nodes from text for a reified knowledge graph. +Your primary task is to extract and classify significant entities mentioned in the provided text. +In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements. +Focus on extracting: +1. Subject entities (people, objects, concepts) +2. Object entities (people, objects, concepts) + +Instructions: + +You are given a TEXT. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the TEXT. + +1. **Entity Identification**: + - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT. + +2. **Entity Classification**: + - Use the descriptions in ENTITY TYPES to classify each extracted entity. + - Assign the appropriate type for each one. + +3. **Exclusions**: + - Do NOT extract entities representing relationships or actions (predicates will be handled separately). + - Do NOT extract dates, times, or other temporal information—these will be handled separately. + +4. **Formatting**: + - Be **explicit and unambiguous** in naming entities (e.g., use full names when available). + + +Format your response as a JSON object with the following structure: + +{ + "entities": [ + { + "name": "Entity Name", + "type": "Entity Type" + } + // Additional entities... + ] +} +`; const userPrompt = ` ${context.episodeContent} - - -${JSON.stringify(context.entityTypes || {}, null, 2)} - - -Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned. -For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions. -Indicate the classified entity type by providing its entity_type_id. - -${context.customPrompt || ""} - -Guidelines: -1. Extract significant entities, concepts, or actors mentioned in the conversation. -2. Avoid creating nodes for relationships or actions. -3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later). -4. Be as explicit as possible in your node names, using full names and avoiding abbreviations. `; return [ @@ -218,3 +251,82 @@ ${JSON.stringify(context.node, null, 2)} }, ]; }; + +/** + * Resolve entity duplications + */ +export const dedupeNodes = (context: Record): CoreMessage[] => { + return [ + { + role: "system", + content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities. + +Each entity in ENTITIES is represented as a JSON object with the following structure: +{ + id: integer id of the entity, + name: "name of the entity", + entity_type: "ontological classification of the entity", + entity_type_description: "Description of what the entity type represents", + duplication_candidates: [ + { + idx: integer index of the candidate entity, + name: "name of the candidate entity", + entity_type: "ontological classification of the candidate entity", + ... + } + ] +} + +For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates. +Entities should only be considered duplicates if they refer to the *same real-world object or concept*. +Do NOT mark entities as duplicates if: +- They are related but distinct. +- They have similar names or purposes but refer to separate instances or concepts. + +Task: +Your response must be a JSON object with an "entity_resolutions" array containing one entry for each entity. + +For each entity, include: +- "id": the id of the entity (integer) +- "name": the name of the entity (string) +- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer) + +Format your response as follows: + +{ + "entity_resolutions": [ + { + "id": 0, + "name": "Entity Name", + "duplicate_idx": -1 + }, + // Additional entity resolutions... + ] +} + + +Notes: +- If an entity is a duplicate of one of its duplication_candidates, set duplicate_idx to the idx of that candidate. +- If an entity is not a duplicate of any candidate, set duplicate_idx to -1. +- Always include all entities from the input in your response. +- Always wrap the output in these tags + `, + }, + { + role: "user", + content: ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} + + + +${JSON.stringify(context.extracted_nodes, null, 2)} + +`, + }, + ]; +}; diff --git a/apps/webapp/app/services/prompts/statements.ts b/apps/webapp/app/services/prompts/statements.ts new file mode 100644 index 0000000..ff4de3f --- /dev/null +++ b/apps/webapp/app/services/prompts/statements.ts @@ -0,0 +1,243 @@ +import { type CoreMessage } from "ai"; +import { type Triple } from "../knowledgeGraph.server"; + +/** + * Extract statements (triples) from episode content in a reified knowledge graph model + * This function generates a prompt for LLM to extract subject-predicate-object statements + * and represent them as first-class nodes with proper connections + */ +export const extract_statements = ( + context: Record, +): CoreMessage[] => { + return [ + { + role: "system", + content: `You are a knowledge graph expert that extracts factual statements from text as subject-predicate-object triples. +Your task is to identify important facts and represent them in a reified knowledge graph model +where each statement is a first-class node connected to subject, predicate, and object entities. + +I need to extract factual statements from the following conversation/text and represent them in a reified knowledge graph. + +Follow these instructions carefully: + +1. Identify key factual statements from the episode content and previous episodes +2. Represent each statement as a subject-predicate-object triple +3. Only use entities from the AVAILABLE ENTITIES list as subjects and objects +4. For each statement, provide: + - The subject entity name (must match exactly one from AVAILABLE ENTITIES) + - The predicate/relationship (a clear, concise verb or relationship type) + - The object entity name (must match exactly one from AVAILABLE ENTITIES) + - A natural language fact that accurately represents the triple + - Any additional attributes relevant to the relationship + +IMPORTANT ABOUT TEMPORAL INFORMATION: +- The system tracks when facts become known (validAt) and contradicted (invalidAt) separately +- You must include any temporal information WITHIN the fact statement itself +- For example, if someone worked at a company from 2015-2020, include this in the "fact" field and "attributes.timespan" field +- Do NOT omit temporal information from facts - it's critical context +- Examples of good temporal facts: + * "John worked at Google from 2015 to 2020" + * "Sarah lived in New York until 2018" + * "The project was completed on March 15, 2023" + +Format your response as a JSON object with the following structure: + +{ + "edges": [ + { + "source": "[Subject Entity Name]", + "relationship": "[Predicate/Relationship Type]", + "target": "[Object Entity Name]", + "fact": "[Natural language representation of the fact INCLUDING any temporal information]", + "attributes": { + "confidence": 0.9, // How confident you are in this fact (0-1) + "source": "explicit", // Whether the fact was explicitly stated or inferred + "timespan": { // Include if the fact has a specific time period + "start": "2015", // When the fact started being true (if known) + "end": "2020" // When the fact stopped being true (if known) + } + } + }, + // Additional statements... + ] +} + + +Important guidelines: +- Only include the most significant and factual statements +- Do not invent entities not present in the AVAILABLE ENTITIES list +- Be precise in representing the relationships +- Each fact should be atomic (representing a single piece of information) +- ALWAYS include temporal information when available (dates, periods, etc.) in both the fact text AND attributes +- Facts should be based on the episode content, not general knowledge +- Aim for quality over quantity, prioritize clear, unambiguous statements +- For ongoing facts (still true), omit the "end" field in timespan`, + }, + { + role: "user", + content: ` + +${context.episodeContent} + + + +${JSON.stringify(context.previousEpisodes, null, 2)} + + + +${JSON.stringify(context.entities, null, 2)} + +`, + }, + ]; +}; + +/** + * Detect contradictions between statements in the knowledge graph + */ +export const detect_contradictions = ( + context: Record, +): CoreMessage[] => { + return [ + { + role: "system", + content: + "You are a knowledge graph reasoning expert that identifies contradictions between statements. " + + "Your task is to analyze pairs of statements and determine if they contradict each other " + + "based on their temporal validity and factual content.", + }, + { + role: "user", + content: ` +I need to detect contradictions between statements in a temporal knowledge graph. + + +${context.newStatement} + + + +${JSON.stringify(context.existingStatements, null, 2)} + + + +${context.referenceTime} + + +Determine if the NEW STATEMENT contradicts any of the EXISTING STATEMENTS. +A contradiction occurs when: + +1. Two statements assert incompatible facts about the same subject-predicate pair +2. The statements overlap in their temporal validity periods + +For example, if one statement says "John works at Company A from January 2023" and another says +"John works at Company B from March 2023", these would contradict if a person can only work at one +company at a time. + +Format your response as a JSON object with the following structure: +{ + "hasContradiction": true/false, + "contradictedStatements": [ + { + "statementId": "[ID of the contradicted statement]", + "reason": "[Explanation of why these statements contradict]", + "temporalRelationship": "[overlapping/containing/contained/after/before]" + } + ] +} + +Important guidelines: +- Consider the temporal validity of statements +- Only mark as contradictions if statements are truly incompatible +- Provide clear reasoning for each identified contradiction +- Consider the context and domain constraints +- If no contradictions exist, return an empty contradictedStatements array +`, + }, + ]; +}; + +/** + * Analyze similar statements to determine duplications and contradictions + * This prompt helps the LLM evaluate semantically similar statements found through vector search + * to determine if they are duplicates or contradictions + */ +export const resolve_statements = ( + context: Record, +): CoreMessage[] => { + return [ + { + role: "system", + content: `You are a knowledge graph expert that analyzes statements to detect duplications and contradictions. +You analyze multiple new statements against existing statements to determine whether the new statement duplicates any existing statement or contradicts any existing statement. +Pay special attention to temporal aspects, event updates, and context changes. If an event changes (like a date shift), statements about the original event are likely contradicted by statements about the updated event. + + +I need to analyze whether a new statement duplicates or contradicts existing statements in a knowledge graph. + + +Follow these instructions carefully: + +1. Analyze if the new statement is a semantic duplicate of any existing statement + - Two statements are duplicates if they express the same meaning even with different wording + - Consider entity resolution has already been done, so different entity names are NOT an issue + +2. Determine if the new statement contradicts any existing valid statements + - Contradictions occur when statements cannot both be true at the same time + - Pay special attention to negations, opposites, and mutually exclusive facts + - Consider temporal validity - statements may only be contradictions within specific time periods + +3. IMPORTANT: For events that change (like rescheduled appointments, moved dates, changed locations): + - When an event changes date/time/location, new statements about the updated event likely contradict statements about the original event + - Look for contextual clues about event changes, cancellations, or rescheduling + - Example: If "Concert on June 10" moved to "Concert on June 12", then "John attends June 10 concert" contradicts "John doesn't attend June 12 concert" + + 4. Format your response as a JSON object with the following structure: + +[{ + "statementId": "new_statement_uuid", + "isDuplicate": true/false, + "duplicateId": "existing_statement_uuid-if-duplicate-exists", + "contradictions": ["existing_statement_uuid-1", "existing_statement_uuid-2"], // UUIDs of any contradicted statements + }] + + + Important guidelines: +- If the new statement is a duplicate, include the UUID of the duplicate statement +- For contradictions, list all statement UUIDs that the new statement contradicts +- If a statement is both a contradiction AND a duplicate (rare case), mark it as a duplicate +- Identify temporal and contextual shifts that may create implicit contradictions +- Don't give any reason, just give the final output. +`, + }, + { + role: "user", + content: ` + + ${context.newStatements + .map( + (triple: Triple) => ` + StatementId: ${triple.statement.uuid} + Fact: ${triple.statement.fact} + Subject: ${triple.subject} + Predicate: ${triple.predicate} + Object: ${triple.object} + --------------------------- + `, + ) + .join("")} + + + + ${JSON.stringify(context.similarStatements, null, 2)} + + + + ${context.episodeContent} + + + + ${context.referenceTime} + `, + }, + ]; +};