diff --git a/apps/webapp/app/routes/workspaces.$workspaceSlug.ingest.tsx b/apps/webapp/app/routes/workspaces.$workspaceSlug.ingest.tsx new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/apps/webapp/app/routes/workspaces.$workspaceSlug.ingest.tsx @@ -0,0 +1 @@ + diff --git a/apps/webapp/app/services/knowledgeGraph.server.ts b/apps/webapp/app/services/knowledgeGraph.server.ts new file mode 100644 index 0000000..b6b07ce --- /dev/null +++ b/apps/webapp/app/services/knowledgeGraph.server.ts @@ -0,0 +1,802 @@ +import HelixDB from "helix-ts"; +import { openai } from "@ai-sdk/openai"; +import { + type CoreMessage, + embed, + generateText, + type LanguageModelV1, + streamText, +} from "ai"; +import { LLMMappings, LLMModelEnum } from "@recall/types"; +import { logger } from "./logger.service"; +import crypto from "crypto"; +import { extract_message, extract_text } from "./prompts/extractNodes"; + +export enum EpisodeType { + Conversation = "CONVERSATION", + Text = "TEXT", +} + +/** + * Interface for episodic node in the reified knowledge graph + * Episodes are containers for statements and represent source information + */ +export interface EpisodicNode { + uuid?: string; + name: string; + content: string; + contentEmbedding?: number[]; + type: string; + source: string; + createdAt: Date; + validAt: Date; + labels: string[]; + userId: string; + space?: string; + sessionId?: string; +} + +/** + * Interface for entity node in the reified knowledge graph + * Entities represent subjects, objects, or predicates in statements + */ +export interface EntityNode { + uuid?: string; + name: string; + type: string; + attributes: Record; + nameEmbedding: number[]; + createdAt: Date; + userId: string; + space?: string; +} + +/** + * Interface for statement node in the reified knowledge graph + * Statements are first-class objects representing facts with temporal properties + */ +export interface StatementNode { + uuid?: string; + fact: string; + factEmbedding: number[]; + createdAt: Date; + validAt: Date; + invalidAt: Date | null; + attributes: Record; + userId: string; + space?: string; +} + +/** + * Interface for a triple in the reified knowledge graph + * A triple connects a subject, predicate, object via a statement node + * and maintains provenance information + */ +export interface Triple { + statement: StatementNode; + subject: EntityNode; + predicate: EntityNode; + object: EntityNode; + provenance: EpisodicNode; +} + +export type AddEpisodeParams = { + name: string; + episodeBody: string; + referenceTime: Date; + type: EpisodeType; + source: string; + userId: string; + spaceId?: string; + sessionId?: string; +}; + +export type AddEpisodeResult = { + episodeUuid: string; + nodesCreated: number; + statementsCreated: number; + processingTimeMs: number; +}; + +// Initialize Helix client +const helixClient = new HelixDB(); + +// Default number of previous episodes to retrieve for context +const DEFAULT_EPISODE_WINDOW = 5; +const RELEVANT_SCHEMA_LIMIT = 10; + +export class KnowledgeGraphService { + async getEmbedding(text: string) { + const { embedding } = await embed({ + model: openai.embedding("text-embedding-3-small"), + value: text, + }); + + return embedding; + } + + async retrieveEpisodes( + referenceTime: Date, + episodeWindow: number = DEFAULT_EPISODE_WINDOW, + userId?: string, + type?: EpisodeType, + ): Promise { + try { + // Use the proper HelixDB query for retrieving episodes + const episodes = await helixClient.query("getRecentEpisodes", { + referenceTime: referenceTime.toISOString(), + limit: episodeWindow, + userId: userId || null, + source: type || null, + }); + + if (!episodes || !Array.isArray(episodes)) { + logger.warn( + "Unexpected response from HelixDB for getRecentEpisodes:", + episodes, + ); + return []; + } + + // Map to EpisodicNode interface + return episodes + .map((ep) => ({ + uuid: ep.uuid, + name: ep.name, + content: ep.content, + sourceDescription: ep.sourceDescription, + source: ep.source as EpisodeType, + createdAt: new Date(ep.createdAt), + validAt: new Date(ep.validAt), + entityEdges: ep.entityEdges || [], + userId: ep.userId, + type: ep.type, + labels: ep.labels || [], + space: ep.space, + sessionId: ep.sessionId, + })) + .reverse(); + } catch (error) { + logger.error("Error retrieving episode context:", { error }); + return []; + } + } + + /** + * Process an episode and update the knowledge graph. + * + * This method extracts information from the episode, creates nodes and statements, + * and updates the HelixDB database according to the reified + temporal approach. + */ + async addEpisode(params: AddEpisodeParams) { + const startTime = Date.now(); + const now = new Date(); + + try { + // Step 1: Context Retrieval - Get previous episodes for context + const previousEpisodes = await this.retrieveEpisodes( + params.referenceTime, + RELEVANT_SCHEMA_LIMIT, + params.userId, + params.type, + ); + + // Step 2: Episode Creation - Create or retrieve the episode + const episode: EpisodicNode = { + uuid: crypto.randomUUID(), + name: params.name, + content: params.episodeBody, + source: params.source || EpisodeType.Text, + type: params.type, + createdAt: now, + validAt: params.referenceTime, + labels: [], + userId: params.userId, + space: params.spaceId, + sessionId: params.sessionId, + }; + + // Step 3: Entity Extraction - Extract entities from the episode content + const extractedNodes = await this.extractEntities( + episode, + previousEpisodes, + ); + + // Step 4: Entity Resolution - Resolve extracted nodes to existing nodes or create new ones + const { resolvedNodes, uuidMap } = await this.resolveExtractedNodes( + extractedNodes, + episode, + previousEpisodes, + ); + + // Step 5: Statement Extraction - Extract statements (triples) instead of direct edges + const extractedStatements = await this.extractStatements( + resolvedNodes, + episode, + previousEpisodes, + ); + + // Step 6: Statement Resolution - Resolve statements and detect contradictions + const { resolvedStatements, invalidatedStatements } = + await this.resolveStatements( + extractedStatements, + episode, + resolvedNodes, + ); + + // Step 7: Role Assignment & Attribute Extraction - Extract additional attributes for nodes + const hydratedNodes = await this.extractAttributesFromNodes( + resolvedNodes, + episode, + previousEpisodes, + ); + + // Step 8: Generate embeddings for semantic search + // Note: In this implementation, embeddings are generated during extraction + // but could be moved to a separate step for clarity + + // Step 10: Save everything to HelixDB using the reified + temporal structure + await this.saveToHelixDB( + episode, + hydratedNodes, + resolvedStatements, + invalidatedStatements, + ); + + const endTime = Date.now(); + const processingTimeMs = endTime - startTime; + + return { + episodeUuid: episode.uuid, + nodesCreated: hydratedNodes.length, + statementsCreated: resolvedStatements.length, + processingTimeMs, + }; + } catch (error) { + console.error("Error in addEpisode:", error); + throw error; + } + } + + /** + * Extract entities from an episode using LLM + */ + private async extractEntities( + episode: EpisodicNode, + previousEpisodes: EpisodicNode[], + ): Promise { + // Use the prompt library to get the appropriate prompts + const context = { + episodeContent: episode.content, + previousEpisodes: previousEpisodes.map((ep) => ({ + content: ep.content, + createdAt: ep.createdAt.toISOString(), + })), + entityTypes: {}, // Could be populated with entity type definitions + }; + + // Get the extract_json prompt from the prompt library + const messages = + episode.type === EpisodeType.Conversation + ? extract_message(context) + : extract_text(context); + + let responseText = ""; + + await this.makeModelCall( + false, + LLMModelEnum.GPT41, + messages as CoreMessage[], + (text) => { + responseText = text; + }, + ); + + const extractedEntities = JSON.parse(responseText || "{}").entities || []; + + // Convert to EntityNode objects + const entities: EntityNode[] = []; + + for (const entity of extractedEntities) { + entities.push({ + uuid: crypto.randomUUID(), + name: entity.name, + type: entity.type, + attributes: entity.attributes || {}, + nameEmbedding: [], // Will be populated later + createdAt: new Date(), + userId: episode.userId, + }); + } + + return entities; + } + + /** + * Resolve extracted nodes to existing nodes or create new ones + */ + private async resolveExtractedNodes( + extractedNodes: EntityNode[], + episode: EpisodicNode, + previousEpisodes: EpisodicNode[], + ): Promise<{ resolvedNodes: EntityNode[]; uuidMap: Map }> { + const uuidMap = new Map(); + const resolvedNodes: EntityNode[] = []; + + for (const extractedNode of extractedNodes) { + // Generate embedding for the node name + const nameEmbedding = await this.getEmbedding(extractedNode.name); + + // Check if a similar node already exists in HelixDB + // Use vector similarity search to find similar entities + // Threshold is 0.85 - meaning at least 85% similarity (lower cosine distance) + const similarEntities = await helixClient.query("findSimilarEntities", { + queryEmbedding: nameEmbedding, + limit: 5, // Get top 5 matches + threshold: 0.85, // 85% similarity threshold + }); + + const existingNodes = similarEntities.nodes; + + // Get entity types dictionary or empty object if not provided + const entityTypesDict = entity_types || {}; + + if (similarEntities.length > 0) { + // If similar nodes exist, we need to decide if we want to merge with an existing one + // This could involve LLM to determine if they're the same entity + const existingNode = similarEntities[0]; + + // Map the extracted node UUID to the existing node UUID + uuidMap.set(extractedNode.uuid, existingNode.uuid); + + // Add the existing node to our resolved nodes if not already present + if (!resolvedNodes.some((node) => node.uuid === existingNode.uuid)) { + resolvedNodes.push({ + uuid: existingNode.uuid, + name: existingNode.name, + type: existingNode.type, + attributes: existingNode.attributes || {}, + nameEmbedding: existingNode.nameEmbedding, + createdAt: new Date(existingNode.createdAt), + userId: existingNode.userId, + }); + } + } else { + // This is a new entity, add embedding and keep as is + extractedNode.nameEmbedding = nameEmbedding; + resolvedNodes.push(extractedNode); + uuidMap.set(extractedNode.uuid, extractedNode.uuid); + } + } + + return { resolvedNodes, uuidMap }; + } + + /** + * Extract statements as first-class objects from an episode using LLM + * This replaces the previous extractEdges method with a reified approach + */ + private async extractStatements( + episode: EpisodicNode, + resolvedNodes: EntityNode[], + previousEpisodes: EpisodicNode[], + ): Promise { + // Use the prompt library to get the appropriate prompts + const context = { + episodeContent: episode.content, + previousEpisodes: previousEpisodes.map((ep) => ({ + content: ep.content, + createdAt: ep.createdAt.toISOString(), + })), + nodes: resolvedNodes.map((node) => ({ + name: node.name, + type: node.type, + uuid: node.uuid, + })), + referenceTime: episode.validAt.toISOString(), + relationshipTypes: {}, // Could be populated with relationship definitions + }; + + // Get the statement extraction prompt from the prompt library + // Note: You might need to update your prompts to extract subject-predicate-object patterns + const messages = promptLibrary.extractEdges.edge.call(context); + + let responseText = ""; + + await this.makeModelCall( + false, + LLMModelEnum.GPT41, + messages as CoreMessage[], + (text) => { + responseText = text; + }, + ); + + // Parse the statements from the LLM response + // This will need to be updated based on your prompt format + const extractedTriples = JSON.parse(responseText || "{}").edges || []; + + // Convert to Triple objects with Statement nodes + const triples: Triple[] = []; + + for (const triple of extractedTriples) { + const subjectNode = resolvedNodes.find( + (node) => + node.name.toLowerCase() === triple.sourceEntityName.toLowerCase(), + ); + + const objectNode = resolvedNodes.find( + (node) => + node.name.toLowerCase() === triple.targetEntityName.toLowerCase(), + ); + + // Find or create a predicate node for the relationship type + const predicateNode = resolvedNodes.find( + (node) => + node.name.toLowerCase() === triple.relationshipType.toLowerCase(), + ) || { + uuid: crypto.randomUUID(), + name: triple.relationshipType, + type: "Predicate", + attributes: {}, + nameEmbedding: [], // Will be populated later + createdAt: new Date(), + userId: episode.userId, + }; + + if (subjectNode && objectNode) { + // Generate embedding for the fact + const factEmbedding = await this.getEmbedding(triple.fact); + + // Create a statement node + const statement: StatementNode = { + uuid: crypto.randomUUID(), + fact: triple.fact, + groupId: crypto.randomUUID().slice(0, 8), // Could be used to group related statements + createdAt: new Date(), + validAt: triple.validAt ? new Date(triple.validAt) : episode.validAt, + invalidAt: triple.invalidAt ? new Date(triple.invalidAt) : undefined, + attributesJson: JSON.stringify({}), // Could store additional metadata + embedding: factEmbedding, + userId: episode.userId, + }; + + triples.push({ + statement, + subject: subjectNode, + predicate: predicateNode, + object: objectNode, + provenance: episode, + }); + } + } + + return triples; + } + + /** + * Resolve statements by checking for existing statements and handling contradictions + * This replaces the previous resolveExtractedEdges method with a reified approach + */ + private async resolveStatements( + triples: Triple[], + episode: EpisodicNode, + nodes: EntityNode[], + ): Promise<{ + resolvedStatements: Triple[]; + invalidatedStatements: Triple[]; + }> { + const resolvedStatements: Triple[] = []; + const invalidatedStatements: Triple[] = []; + + for (const triple of triples) { + // Find similar existing statements in HelixDB using the findContradictoryStatements query + const existingStatements = await helixClient.query( + "findContradictoryStatements", + { + subjectId: triple.subject.uuid, + predicateId: triple.predicate.uuid, + }, + ); + + if (existingStatements && existingStatements.length > 0) { + // If we have statements with the same subject and predicate, + // check if they have different objects (contradiction) + + // Get full triple information for the existing statement + const existingTripleData = await helixClient.query( + "getTripleForStatement", + { + statementId: existingStatements[0].uuid, + }, + ); + + if ( + existingTripleData && + existingTripleData.object.uuid !== triple.object.uuid + ) { + // This is potentially a contradiction - objects differ for same subject+predicate + + // Use LLM to determine if this is truly a contradiction + const isContradiction = await this.detectContradiction( + triple.statement.fact, + existingTripleData.statement.fact, + ); + + if (isContradiction) { + // Create a copy of the existing statement as invalidated + const invalidatedStatement: Triple = { + statement: { + ...existingTripleData.statement, + invalidAt: episode.validAt, // Mark as invalid at this episode's time + }, + subject: existingTripleData.subject, + predicate: existingTripleData.predicate, + object: existingTripleData.object, + provenance: existingTripleData.provenance, + }; + + invalidatedStatements.push(invalidatedStatement); + + // Add the new statement as a replacement + resolvedStatements.push(triple); + } else { + // Not a contradiction, just add the new statement + resolvedStatements.push(triple); + } + } else { + // Same triple already exists, no need to create a duplicate + // We could merge additional metadata or update provenance information + resolvedStatements.push(triple); + } + } else { + // This is a new statement, add it as is + resolvedStatements.push(triple); + } + } + + return { resolvedStatements, invalidatedStatements }; + } + + /** + * Detect if a new statement contradicts an existing statement + * This supports the reified + temporal knowledge graph approach by detecting + * statement-level contradictions rather than edge-level contradictions + */ + private async detectContradiction( + newFact: string, + existingFact: string, + context?: { subject?: string; predicate?: string }, + ): Promise { + // Use the prompt library to get the appropriate prompts + const promptContext = { + newFact, + existingFact, + subject: context?.subject || null, + predicate: context?.predicate || null, + }; + + // Get the detect_contradiction prompt from the prompt library + // The prompt should be updated to handle reified statements specifically + const messages = + promptLibrary.detectContradiction.detect_json.call(promptContext); + + let responseText = ""; + + await this.makeModelCall( + false, + LLMModelEnum.GPT41, + messages as CoreMessage[], + (text) => { + responseText = text; + }, + ); + + try { + const result = JSON.parse(responseText); + + // If we have a well-formed response with temporal information, use it + if ( + result.temporalAnalysis && + typeof result.temporalAnalysis === "object" + ) { + // Check if the statements contradict based on temporal validity + // This is important for the reified + temporal approach + if (result.temporalAnalysis.areCompatible === false) { + return true; // This is a contradiction + } + } + + // Fall back to the direct contradiction flag if temporal analysis isn't available + return result.isContradiction === true; + } catch (e) { + // Fallback to simple text parsing if JSON parsing fails + return ( + responseText.toLowerCase().includes("true") || + responseText.toLowerCase().includes("contradiction") + ); + } + } + + /** + * Extract additional attributes for nodes + */ + private async extractAttributesFromNodes( + nodes: EntityNode[], + episode: EpisodicNode, + previousEpisodes: EpisodicNode[], + ): Promise { + // This could involve LLM to extract more attributes for each node + // For simplicity, we'll just return the nodes as is + return nodes; + } + + // buildEpisodicEdges method removed as part of the reified knowledge graph refactoring. + // In the reified model, episodes connect to entities through Statement nodes and HasProvenance edges. + + /** + * Save all entities and statements to HelixDB using reified structure + * Creates statements and HasSubject, HasObject, HasPredicate, HasProvenance edges + */ + private async saveToHelixDB( + episode: EpisodicNode, + nodes: EntityNode[], + resolvedStatements: Triple[], + invalidatedStatements: Triple[], + ): Promise { + try { + // 1. Save the episode first + await helixClient.query("saveEpisode", { + uuid: episode.uuid, + name: episode.name, + content: episode.content, + source: episode.source, + sourceDescription: episode.sourceDescription, + userId: episode.userId || null, + labels: episode.labels || [], + createdAt: episode.createdAt.toISOString(), + validAt: episode.validAt.toISOString(), + embedding: [], // Embedding could be added here if needed + }); + + // 2. Save or update all entity nodes + for (const node of nodes) { + await helixClient.query("saveEntity", { + uuid: node.uuid, + name: node.name, + summary: node.type, // Using type as summary + userId: node.userId || null, + createdAt: node.createdAt.toISOString(), + attributesJson: JSON.stringify(node.attributes || {}), + embedding: node.nameEmbedding || [], + }); + } + + // 3. Process all resolved statements + for (const triple of resolvedStatements) { + // Save the statement node first + await helixClient.query("saveStatement", { + uuid: triple.statement.uuid, + fact: triple.statement.fact, + groupId: triple.statement.groupId, + userId: triple.statement.userId || null, + createdAt: triple.statement.createdAt.toISOString(), + validAt: triple.statement.validAt.toISOString(), + invalidAt: triple.statement.invalidAt + ? triple.statement.invalidAt.toISOString() + : null, + attributesJson: triple.statement.attributesJson, + embedding: triple.statement.embedding || [], + }); + + // Create HasSubject edge + await helixClient.query("createHasSubjectEdge", { + uuid: crypto.randomUUID(), + statementId: triple.statement.uuid, + entityId: triple.subject.uuid, + createdAt: new Date().toISOString(), + }); + + // Create HasObject edge + await helixClient.query("createHasObjectEdge", { + uuid: crypto.randomUUID(), + statementId: triple.statement.uuid, + entityId: triple.object.uuid, + createdAt: new Date().toISOString(), + }); + + // Create HasPredicate edge + await helixClient.query("createHasPredicateEdge", { + uuid: crypto.randomUUID(), + statementId: triple.statement.uuid, + entityId: triple.predicate.uuid, + createdAt: new Date().toISOString(), + }); + + // Create HasProvenance edge to link the statement to its source episode + await helixClient.query("createHasProvenanceEdge", { + uuid: crypto.randomUUID(), + statementId: triple.statement.uuid, + episodeId: episode.uuid, + createdAt: new Date().toISOString(), + }); + } + + // 4. Handle invalidated statements (update them with new invalidAt time) + for (const triple of invalidatedStatements) { + await helixClient.query("saveStatement", { + uuid: triple.statement.uuid, + fact: triple.statement.fact, + groupId: triple.statement.groupId, + userId: triple.statement.userId || null, + createdAt: triple.statement.createdAt.toISOString(), + validAt: triple.statement.validAt.toISOString(), + invalidAt: triple.statement.invalidAt.toISOString(), // This will be the episode.validAt timestamp + attributesJson: triple.statement.attributesJson, + embedding: triple.statement.embedding || [], + }); + } + } catch (error) { + console.error("Error saving to HelixDB:", error); + throw error; + } + } + + private async makeModelCall( + stream: boolean, + model: LLMModelEnum, + messages: CoreMessage[], + onFinish: (text: string, model: string) => void, + ) { + let modelInstance; + let finalModel: string = "unknown"; + + switch (model) { + case LLMModelEnum.GPT35TURBO: + case LLMModelEnum.GPT4TURBO: + case LLMModelEnum.GPT4O: + case LLMModelEnum.GPT41: + case LLMModelEnum.GPT41MINI: + case LLMModelEnum.GPT41NANO: + finalModel = LLMMappings[model]; + modelInstance = openai(finalModel); + break; + + case LLMModelEnum.CLAUDEOPUS: + case LLMModelEnum.CLAUDESONNET: + case LLMModelEnum.CLAUDEHAIKU: + finalModel = LLMMappings[model]; + break; + + case LLMModelEnum.GEMINI25FLASH: + case LLMModelEnum.GEMINI25PRO: + case LLMModelEnum.GEMINI20FLASH: + case LLMModelEnum.GEMINI20FLASHLITE: + finalModel = LLMMappings[model]; + break; + + default: + logger.warn(`Unsupported model type: ${model}`); + break; + } + + if (stream) { + return await streamText({ + model: modelInstance as LanguageModelV1, + messages, + onFinish: async ({ text }) => { + onFinish(text, finalModel); + }, + }); + } + + const { text } = await generateText({ + model: modelInstance as LanguageModelV1, + messages, + }); + + onFinish(text, finalModel); + + return text; + } +} diff --git a/apps/webapp/app/services/prompts/contradiction.ts b/apps/webapp/app/services/prompts/contradiction.ts new file mode 100644 index 0000000..46e4a18 --- /dev/null +++ b/apps/webapp/app/services/prompts/contradiction.ts @@ -0,0 +1,50 @@ +/** + * Prompts for detecting contradictions between facts + */ + +import { type CoreMessage } from "ai"; + +export interface ContradictionResult { + isContradiction: boolean; + explanation?: string; + resolution?: string; +} + +/** + * Detect contradictions between facts + */ +export const detect = (context: Record): CoreMessage[] => { + return [ + { + role: "system", + content: + "You are an expert at detecting contradictions between facts in a knowledge graph.", + }, + { + role: "user", + content: ` + +${JSON.stringify(context.existingFact, null, 2)} + + + +${JSON.stringify(context.newFact, null, 2)} + + +Determine if the NEW FACT contradicts the EXISTING FACT. A contradiction occurs when: +1. Both facts cannot be simultaneously true +2. The facts present mutually exclusive information about the same entities and relationship + +Respond with a JSON object containing: +- "isContradiction": boolean (true if contradiction exists) +- "explanation": string (brief explanation of the contradiction if one exists) +- "resolution": string (suggested resolution approach, if applicable) + +Be careful to consider: +- Temporal context (facts may be true at different times) +- Different levels of specificity (one fact may be more detailed) +- Different perspectives or interpretations +`, + }, + ]; +}; diff --git a/apps/webapp/app/services/prompts/extractEdges.ts b/apps/webapp/app/services/prompts/extractEdges.ts new file mode 100644 index 0000000..2067ef6 --- /dev/null +++ b/apps/webapp/app/services/prompts/extractEdges.ts @@ -0,0 +1,166 @@ +/** + * Prompts for extracting relationships between entities + */ + +import { Message, type CoreMessage } from "ai"; + +export interface Edge { + relationshipType: string; + sourceEntityName: string; + targetEntityName: string; + fact: string; + validAt?: string; + invalidAt?: string; +} + +export interface ExtractedEdges { + edges: Edge[]; +} + +export interface MissingFacts { + missingFacts: string[]; +} + +/** + * Extract relationships between entities + */ +export const edge = (context: Record): CoreMessage[] => { + return [ + { + role: "system", + content: + "You are an expert fact extractor that extracts fact triples from text. " + + "1. Extracted fact triples should also be extracted with relevant date information." + + "2. Treat the CURRENT TIME as the time the CURRENT EPISODE was created. All temporal information should be extracted relative to this time.", + }, + { + role: "user", + content: ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} + + + +${JSON.stringify(context.nodes || [], null, 2)} + + + +${context.referenceTime} # ISO 8601 (UTC); used to resolve relative time mentions + + + +${JSON.stringify(context.edgeTypes || {}, null, 2)} + + +# TASK +Extract all factual relationships between the given ENTITIES based on the CURRENT EPISODE. +Only extract facts that: +- involve two DISTINCT ENTITIES from the ENTITIES list, +- are clearly stated or unambiguously implied in the CURRENT EPISODE, + and can be represented as edges in a knowledge graph. +- The RELATIONSHIP TYPES provide a list of the most important types of relationships, make sure to extract any facts that + could be classified into one of the provided relationship types + +You may use information from the PREVIOUS EPISODES only to disambiguate references or support continuity. + +${context.customPrompt || ""} + +# EXTRACTION RULES + +1. Only emit facts where both the subject and object match entities in ENTITIES. +2. Each fact must involve two **distinct** entities. +3. Use a SCREAMING_SNAKE_CASE string as the \`relationshipType\` (e.g., FOUNDED, WORKS_AT). +4. Do not emit duplicate or semantically redundant facts. +5. The \`fact\` should quote or closely paraphrase the original source sentence(s). +6. Use \`REFERENCE_TIME\` to resolve vague or relative temporal expressions (e.g., "last week"). +7. Do **not** hallucinate or infer temporal bounds from unrelated events. + +# DATETIME RULES + +- Use ISO 8601 with "Z" suffix (UTC) (e.g., 2025-04-30T00:00:00Z). +- If the fact is ongoing (present tense), set \`validAt\` to REFERENCE_TIME. +- If a change/termination is expressed, set \`invalidAt\` to the relevant timestamp. +- Leave both fields \`null\` if no explicit or resolvable time is stated. +- If only a date is mentioned (no time), assume 00:00:00. +- If only a year is mentioned, use January 1st at 00:00:00. + +Respond with a JSON object containing an "edges" array of objects, each with "relationshipType", "sourceEntityName", "targetEntityName", "fact", and optionally "validAt" and "invalidAt" properties. +`, + }, + ]; +}; + +/** + * Check for missed facts + */ +export const reflexion = (context: Record): CoreMessage[] => { + const sysPrompt = `You are an AI assistant that determines which facts have not been extracted from the given context`; + + const userPrompt = ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} + + + +${JSON.stringify(context.nodes || [], null, 2)} + + + +${JSON.stringify(context.extractedFacts || [], null, 2)} + + +Given the above EPISODES, list of EXTRACTED ENTITIES entities, and list of EXTRACTED FACTS; +determine if any facts haven't been extracted. Respond with a JSON object containing a "missingFacts" array of strings. +`; + + return [ + { role: "system", content: sysPrompt }, + { role: "user", content: userPrompt }, + ]; +}; + +/** + * Extract additional attributes for edges + */ +export const extract_attributes = ( + context: Record, +): CoreMessage[] => { + return [ + { + role: "system", + content: + "You are a helpful assistant that extracts fact properties from the provided text.", + }, + { + role: "user", + content: ` + +${JSON.stringify(context.episodeContent, null, 2)} + + + +${context.referenceTime} + + +Given the above EPISODE, its REFERENCE TIME, and the following FACT, update any of its attributes based on the information provided +in EPISODE. Use the provided attribute descriptions to better understand how each attribute should be determined. + +Guidelines: +1. Do not hallucinate entity property values if they cannot be found in the current context. +2. Only use the provided EPISODES and FACT to set attribute values. + + +${JSON.stringify(context.fact, null, 2)} + +`, + }, + ]; +}; diff --git a/apps/webapp/app/services/prompts/extractNodes.ts b/apps/webapp/app/services/prompts/extractNodes.ts new file mode 100644 index 0000000..1b06510 --- /dev/null +++ b/apps/webapp/app/services/prompts/extractNodes.ts @@ -0,0 +1,220 @@ +/** + * Prompts for extracting entity nodes from episodes + */ + +import { type CoreMessage } from "ai"; + +export interface ExtractedEntity { + name: string; + type: string; + attributes?: Record; +} + +export interface ExtractedEntities { + entities: ExtractedEntity[]; +} + +export interface MissedEntities { + missedEntities: string[]; +} + +export interface EntityClassificationTriple { + uuid: string; + name: string; + type: string | null; +} + +export interface EntityClassification { + entityClassifications: EntityClassificationTriple[]; +} + +/** + * Extract entities from an episode using message-based approach + */ +export const extract_message = ( + context: Record, +): CoreMessage[] => { + const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages. +Your primary task is to extract and classify significant entities mentioned in the conversation.`; + + const userPrompt = ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} + + + +${JSON.stringify(context.entityTypes || {}, null, 2)} + + +Instructions: + +You are given a conversation context and a CURRENT EPISODE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT EPISODE. + +1. **Entity Identification**: + - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE. + - **Exclude** entities mentioned only in the PREVIOUS EPISODES (they are for context only). + +2. **Entity Classification**: + - Use the descriptions in ENTITY TYPES to classify each extracted entity. + - Assign the appropriate type for each one. + +3. **Exclusions**: + - Do NOT extract entities representing relationships or actions. + - Do NOT extract dates, times, or other temporal information—these will be handled separately. + +4. **Formatting**: + - Be **explicit and unambiguous** in naming entities (e.g., use full names when available). + +${context.customPrompt || ""} +`; + + return [ + { role: "system", content: sysPrompt }, + { role: "user", content: userPrompt }, + ]; +}; + +/** + * Extract entities from text-based content + */ +export const extract_text = (context: Record): CoreMessage[] => { + const sysPrompt = `You are an AI assistant that extracts entity nodes from text. +Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text.`; + + const userPrompt = ` + +${context.episodeContent} + + + +${JSON.stringify(context.entityTypes || {}, null, 2)} + + +Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned. +For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions. +Indicate the classified entity type by providing its entity_type_id. + +${context.customPrompt || ""} + +Guidelines: +1. Extract significant entities, concepts, or actors mentioned in the conversation. +2. Avoid creating nodes for relationships or actions. +3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later). +4. Be as explicit as possible in your node names, using full names and avoiding abbreviations. +`; + + return [ + { role: "system", content: sysPrompt }, + { role: "user", content: userPrompt }, + ]; +}; + +/** + * Extract entities from an episode using JSON-based approach + */ +export const extract_json = (context: Record): CoreMessage[] => { + const sysPrompt = `You are an AI assistant that extracts entity nodes from text. +Your primary task is to extract and classify significant entities mentioned in the content.`; + + const userPrompt = ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} + + + +${JSON.stringify(context.entityTypes || {}, null, 2)} + + +Instructions: + +Extract all significant entities mentioned in the CURRENT EPISODE. For each entity, provide a name and type. +Respond with a JSON object containing an "entities" array of objects, each with "name" and "type" properties. + +Guidelines: +1. Extract significant entities, concepts, or actors mentioned in the content. +2. Avoid creating nodes for relationships or actions. +3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later). +4. Be as explicit as possible in your node names, using full names and avoiding abbreviations. + +${context.customPrompt || ""} +`; + + return [ + { role: "system", content: sysPrompt }, + { role: "user", content: userPrompt }, + ]; +}; + +/** + * Check for missed entities + */ +export const reflexion = (context: Record): CoreMessage[] => { + const sysPrompt = `You are an AI assistant that determines which entities have not been extracted from the given context`; + + const userPrompt = ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} + + + +${context.episodeContent} + + + +${JSON.stringify(context.extractedEntities || [], null, 2)} + + +Given the above previous episodes, current episode, and list of extracted entities; determine if any entities haven't been +extracted. Respond with a JSON object containing a "missedEntities" array of strings. +`; + + return [ + { role: "system", content: sysPrompt }, + { role: "user", content: userPrompt }, + ]; +}; + +/** + * Extract additional attributes for entities + */ +export const extract_attributes = ( + context: Record, +): CoreMessage[] => { + return [ + { + role: "system", + content: + "You are a helpful assistant that extracts entity properties from the provided text.", + }, + { + role: "user", + content: ` + +${JSON.stringify(context.previousEpisodes || [], null, 2)} +${JSON.stringify(context.episodeContent, null, 2)} + + +Given the above EPISODES and the following ENTITY, update any of its attributes based on the information provided +in EPISODES. Use the provided attribute descriptions to better understand how each attribute should be determined. + +Guidelines: +1. Do not hallucinate entity property values if they cannot be found in the current context. +2. Only use the provided EPISODES and ENTITY to set attribute values. +3. The summary attribute represents a summary of the ENTITY, and should be updated with new information about the Entity from the EPISODES. + Summaries must be no longer than 250 words. + + +${JSON.stringify(context.node, null, 2)} + +`, + }, + ]; +}; diff --git a/apps/webapp/app/services/prompts/extractStatements.ts b/apps/webapp/app/services/prompts/extractStatements.ts new file mode 100644 index 0000000..e69de29 diff --git a/apps/webapp/app/services/prompts/index.ts b/apps/webapp/app/services/prompts/index.ts new file mode 100644 index 0000000..d81cc73 --- /dev/null +++ b/apps/webapp/app/services/prompts/index.ts @@ -0,0 +1,8 @@ +/** + * Exports for the prompts module + */ + +// Export types from individual prompt modules +export { type ExtractedEntity, type ExtractedEntities } from "./extractNodes"; +export { type Edge, type ExtractedEdges } from "./extractEdges"; +export { type ContradictionResult } from "./contradiction"; diff --git a/apps/webapp/app/services/prompts/models.ts b/apps/webapp/app/services/prompts/models.ts new file mode 100644 index 0000000..01854e9 --- /dev/null +++ b/apps/webapp/app/services/prompts/models.ts @@ -0,0 +1,14 @@ +/** + * Models for prompt system + */ + +export interface Message { + role: 'system' | 'user' | 'assistant'; + content: string; +} + +export type PromptFunction = (context: Record) => Message[]; + +export interface PromptVersion { + [version: string]: (context: Record) => Message[]; +} diff --git a/apps/webapp/package.json b/apps/webapp/package.json index b5b2f22..24a312e 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -11,9 +11,11 @@ "typecheck": "tsc" }, "dependencies": { - "@recall/database": "workspace:*", + "@ai-sdk/openai": "^1.3.21", "@opentelemetry/api": "1.9.0", "@radix-ui/react-slot": "^1.2.3", + "@recall/database": "workspace:*", + "@recall/types": "workspace:*", "@remix-run/express": "2.16.7", "@remix-run/node": "2.1.0", "@remix-run/react": "2.16.7", @@ -21,26 +23,28 @@ "@remix-run/serve": "2.16.7", "@remix-run/server-runtime": "2.16.7", "@remix-run/v1-meta": "^0.1.3", - "@tailwindcss/postcss": "^4.1.7", "@tailwindcss/container-queries": "^0.1.1", - "compression": "^1.7.4", + "@tailwindcss/postcss": "^4.1.7", + "ai": "4.3.14", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", + "compression": "^1.7.4", "cross-env": "^7.0.3", "express": "^4.18.1", + "helix-ts": "^1.0.4", "isbot": "^4.1.0", + "lucide-react": "^0.511.0", "morgan": "^1.10.0", "nanoid": "3.3.8", - "lucide-react": "^0.511.0", "non.geist": "^1.0.2", "posthog-js": "^1.116.6", "react": "^18.2.0", "react-dom": "^18.2.0", "remix-auth": "^3.6.0", "remix-auth-google": "^2.0.0", + "remix-themes": "^1.3.1", "remix-typedjson": "0.3.1", "remix-utils": "^7.7.0", - "remix-themes": "^1.3.1", "tailwind-merge": "^2.6.0", "tailwind-scrollbar-hide": "^2.0.0", "tailwindcss-animate": "^1.0.7", @@ -57,10 +61,9 @@ "@tailwindcss/forms": "^0.5.10", "@tailwindcss/typography": "^0.5.16", "@tailwindcss/vite": "^4.1.7", - "@types/compression": "^1.7.2", - "@types/morgan": "^1.9.3", "@types/express": "^4.17.13", + "@types/morgan": "^1.9.3", "@types/react": "^18.2.20", "@types/react-dom": "^18.2.7", "@typescript-eslint/eslint-plugin": "^6.7.4", @@ -68,12 +71,12 @@ "autoprefixer": "^10.4.19", "css-loader": "^6.10.0", "esbuild": "^0.25.5", - "eslint-import-resolver-typescript": "^3.6.1", - "eslint-plugin-jsx-a11y": "^6.7.1", - "eslint-plugin-react": "^7.33.2", "eslint": "^8.24.0", "eslint-config-prettier": "^8.5.0", + "eslint-import-resolver-typescript": "^3.6.1", "eslint-plugin-import": "^2.29.1", + "eslint-plugin-jsx-a11y": "^6.7.1", + "eslint-plugin-react": "^7.33.2", "eslint-plugin-react-hooks": "^4.6.2", "eslint-plugin-turbo": "^2.0.4", "postcss": "^8.4.38", @@ -90,4 +93,4 @@ "engines": { "node": ">=20.0.0" } -} \ No newline at end of file +} diff --git a/helixdb-cfg/config.hx.json b/helixdb-cfg/config.hx.json new file mode 100644 index 0000000..3ae481f --- /dev/null +++ b/helixdb-cfg/config.hx.json @@ -0,0 +1,12 @@ + +{ + "vector_config": { + "m": 16, + "ef_construction": 128, + "ef_search": 768 + }, + "graph_config": { + "secondary_indices": [] + }, + "db_max_size_gb": 10 +} diff --git a/helixdb-cfg/queries.hx b/helixdb-cfg/queries.hx new file mode 100644 index 0000000..87bf430 --- /dev/null +++ b/helixdb-cfg/queries.hx @@ -0,0 +1,230 @@ +// Save an episode to the database +QUERY saveEpisode(uuid: String, name: String, content: String, source: String, + sourceDescription: String, userId: String, labels: [String], + createdAt: String, validAt: String, embedding: [F32]) => + episode <- AddV({ + uuid: uuid, + name: name, + content: content, + source: source, + sourceDescription: sourceDescription, + userId: userId, + labels: labels, + createdAt: createdAt, + validAt: validAt, + embedding: embedding + }) + RETURN episode + +// Get a specific episode by UUID +QUERY getEpisode(uuid: String) => + episode <- V(uuid) + RETURN episode + +// Get recent episodes with optional filters +QUERY getRecentEpisodes(referenceTime: String, limit: I32, userId: String, source: String) => + episodes <- V::WHERE(_::{validAt}::LTE(referenceTime)) + // Apply filters if provided + episodes <- IF userId != NULL THEN episodes::WHERE(_::{userId}::EQ(userId)) ELSE episodes + episodes <- IF source != NULL THEN episodes::WHERE(_::{source}::EQ(source)) ELSE episodes + // Sort and limit + episodes <- episodes::Sort({validAt: -1})::Limit(limit) + RETURN episodes + +// Save an entity node +QUERY saveEntity(uuid: String, name: String, summary: String, + userId: String, createdAt: String, attributesJson: String, embedding: [F32]) => + entity <- AddV({ + uuid: uuid, + name: name, + summary: summary, + userId: userId, + createdAt: createdAt, + attributesJson: attributesJson, + embedding: embedding + }) + RETURN entity + +// Get an entity by UUID +QUERY getEntity(uuid: String) => + entity <- V(uuid) + RETURN entity + +// Save a statement with temporal information +QUERY saveStatement(uuid: String, fact: String, groupId: String, userId: String, + createdAt: String, validAt: String, invalidAt: String, + attributesJson: String, embedding: [F32]) => + statement <- AddV({ + uuid: uuid, + fact: fact, + groupId: groupId, + userId: userId, + createdAt: createdAt, + validAt: validAt, + invalidAt: invalidAt, + attributesJson: attributesJson, + embedding: embedding + }) + RETURN statement + +// Create HasSubject edge +QUERY createHasSubjectEdge(uuid: String, statementId: String, entityId: String, createdAt: String) => + statement <- V(statementId) + entity <- V(entityId) + edge <- AddE::From(statement)::To(entity)({ + uuid: uuid, + createdAt: createdAt + }) + RETURN edge + +// Create HasObject edge +QUERY createHasObjectEdge(uuid: String, statementId: String, entityId: String, createdAt: String) => + statement <- V(statementId) + entity <- V(entityId) + edge <- AddE::From(statement)::To(entity)({ + uuid: uuid, + createdAt: createdAt + }) + RETURN edge + +// Create HasPredicate edge +QUERY createHasPredicateEdge(uuid: String, statementId: String, entityId: String, createdAt: String) => + statement <- V(statementId) + entity <- V(entityId) + edge <- AddE::From(statement)::To(entity)({ + uuid: uuid, + createdAt: createdAt + }) + RETURN edge + +// Create HasProvenance edge +QUERY createHasProvenanceEdge(uuid: String, statementId: String, episodeId: String, createdAt: String) => + statement <- V(statementId) + episode <- V(episodeId) + edge <- AddE::From(statement)::To(episode)({ + uuid: uuid, + createdAt: createdAt + }) + RETURN edge + +// Get all statements for a subject entity +QUERY getStatementsForSubject(entityId: String) => + entity <- V(entityId) + statements <- entity::In + RETURN statements + +// Get all statements for an object entity +QUERY getStatementsForObject(entityId: String) => + entity <- V(entityId) + statements <- entity::In + RETURN statements + +// Get all statements with a specific predicate +QUERY getStatementsForPredicate(predicateId: String) => + predicate <- V(predicateId) + statements <- predicate::In + RETURN statements + +// Get all statements from an episode +QUERY getStatementsFromEpisode(episodeId: String) => + episode <- V(episodeId) + statements <- episode::In + RETURN statements + +// Get the complete subject-predicate-object triples for a statement +QUERY getTripleForStatement(statementId: String) => + statement <- V(statementId) + subject <- statement::Out + predicate <- statement::Out + object <- statement::Out + RETURN { + statement: statement, + subject: subject, + predicate: predicate, + object: object + } + +// Find all statements valid at a specific time +QUERY getStatementsValidAtTime(timestamp: String, userId: String) => + statements <- V::WHERE( + AND( + _::{validAt}::LTE(timestamp), + OR( + _::{invalidAt}::GT(timestamp), + _::{invalidAt}::EQ(NULL) + ) + ) + ) + // Filter by userId if provided + statements <- IF userId != NULL THEN + statements::WHERE(_::{userId}::EQ(userId)) + ELSE + statements + RETURN statements + +// Find contradictory statements (same subject and predicate but different objects) +QUERY findContradictoryStatements(subjectId: String, predicateId: String) => + subject <- V(subjectId) + predicate <- V(predicateId) + + // Get all statements that have this subject + statements <- subject::In + + // Filter to those with the specified predicate + statements <- statements::WHERE( + _::Out::ID()::EQ(predicateId) + ) + + // Get all valid statements + valid_statements <- statements::WHERE( + OR( + _::{invalidAt}::EQ(NULL), + _::{invalidAt}::GT(NOW()) + ) + ) + + RETURN valid_statements + +// Find semantically similar entities using vector embeddings +QUERY findSimilarEntities(queryEmbedding: [F32], limit: I32, threshold: F32) => + entities <- V::Neighbor(queryEmbedding, threshold)::Limit(limit) + RETURN entities + +// Find semantically similar statements using vector embeddings +QUERY findSimilarStatements(queryEmbedding: [F32], limit: I32, threshold: F32) => + statements <- V::Neighbor(queryEmbedding, threshold)::Limit(limit) + RETURN statements + +// Retrieve a complete knowledge triple (subject, predicate, object) with temporal information +QUERY getTemporalTriple(statementId: String) => + statement <- V(statementId) + subject <- statement::Out + predicate <- statement::Out + object <- statement::Out + episode <- statement::Out + + RETURN { + statement: { + id: statement::{uuid}, + fact: statement::{fact}, + validAt: statement::{validAt}, + invalidAt: statement::{invalidAt}, + attributesJson: statement::{attributesJson} + }, + subject: { + id: subject::{uuid}, + name: subject::{name} + }, + predicate: { + id: predicate::{uuid}, + name: predicate::{name} + }, + object: { + id: object::{uuid}, + name: object::{name} + }, + provenance: { + id: episode::{uuid}, + name: episode::{name} + } + } diff --git a/helixdb-cfg/schema.hx b/helixdb-cfg/schema.hx new file mode 100644 index 0000000..eaedea5 --- /dev/null +++ b/helixdb-cfg/schema.hx @@ -0,0 +1,78 @@ +// Knowledge Graph Schema: Combines reified relationships with temporal graph memory +// This schema implements a hybrid approach that allows for: +// 1. Representing facts as first-class entities (reification) +// 2. Tracking temporal validity of information +// 3. Maintaining provenance (where information came from) +// 4. Supporting direct entity-to-entity relationships for performance + + +V::Episode { + name: String, + content: String, + source: String, + type: String, + userId: String, + createdAt: DateTime, + validAt: DateTime, + labels: Array, + space: String, + sessionId: String +} + +V::Entity { + name: String, + summary: String, + type: String, + createdAt: DateTime, + attributes: String + userId: String, + space: String +} + +// Statement node is the core of reification - turning facts into first-class objects +// This allows tracking validity periods, provenance, and treating facts as objects themselves +V::Statement { + fact: String, + createdAt: DateTime, + validAt: DateTime, + invalidAt: DateTime, + attributes: String + userId: String, + space: String +} + +// Subject of the statement (the entity the statement is about) +E::HasSubject { + To: Entity, + From: Statement, + Properties: { + createdAt: DateTime + } +} + +// Object of the statement (the entity that receives the action or is related to) +E::HasObject { + To: Entity, + From: Statement, + Properties: { + createdAt: DateTime + } +} + +// Predicate of the statement (the relationship type or verb) +E::HasPredicate { + To: Entity, + From: Statement, + Properties: { + createdAt: DateTime + } +} + +// Provenance connection - links a statement to its source episode +E::HasProvenance { + To: Episode, + From: Statement, + Properties: { + createdAt: DateTime + } +} \ No newline at end of file diff --git a/packages/types/src/graph/graph.entity.ts b/packages/types/src/graph/graph.entity.ts new file mode 100644 index 0000000..f857b82 --- /dev/null +++ b/packages/types/src/graph/graph.entity.ts @@ -0,0 +1,58 @@ +export enum EpisodeType { + Message = "message", + Code = "code", + Documentation = "documentation", +} + +export interface AddEpisodeParams { + name: string; + episodeBody: string; + sourceDescription: string; + referenceTime: Date; + source?: EpisodeType; + userId?: string; + uuid?: string; +} + +export interface AddEpisodeResult { + episodeUuid: string; + nodesCreated: number; + edgesCreated: number; + processingTimeMs: number; +} + +export interface EntityNode { + uuid: string; + name: string; + type: string; + attributes?: Record; + nameEmbedding?: number[]; + createdAt: Date; + userId?: string; +} + +export interface EntityEdge { + uuid: string; + source: string; // source node uuid + target: string; // target node uuid + relationship: string; + fact: string; + factEmbedding?: number[]; + validAt: Date; + invalidAt?: Date; + isValid: boolean; + episodes: string[]; // episode uuids where this edge was mentioned + userId?: string; +} + +export interface EpisodicNode { + uuid: string; + name: string; + content: string; + sourceDescription: string; + source: EpisodeType; + createdAt: Date; + validAt: Date; + entityEdges: string[]; // edge uuids + userId?: string; +} diff --git a/packages/types/src/graph/index.ts b/packages/types/src/graph/index.ts new file mode 100644 index 0000000..1ca3928 --- /dev/null +++ b/packages/types/src/graph/index.ts @@ -0,0 +1 @@ +export * from "./graph.entity"; diff --git a/packages/types/src/index.ts b/packages/types/src/index.ts index e69de29..555ffe9 100644 --- a/packages/types/src/index.ts +++ b/packages/types/src/index.ts @@ -0,0 +1,2 @@ +export * from "./llm"; +export * from "./graph"; diff --git a/packages/types/src/llm/index.ts b/packages/types/src/llm/index.ts new file mode 100644 index 0000000..e0080fe --- /dev/null +++ b/packages/types/src/llm/index.ts @@ -0,0 +1 @@ +export * from "./llm.entity"; diff --git a/packages/types/src/llm/llm.entity.ts b/packages/types/src/llm/llm.entity.ts new file mode 100644 index 0000000..df69d9a --- /dev/null +++ b/packages/types/src/llm/llm.entity.ts @@ -0,0 +1,73 @@ +export enum LLMModelEnum { + GPT35TURBO = "GPT35TURBO", + GPT4TURBO = "GPT4TURBO", + GPT4O = "GPT4O", + GPT41 = "GPT41", + GPT41MINI = "GPT41MINI", + GPT41NANO = "GPT41NANO", + LLAMA3 = "LLAMA3", + CLAUDEOPUS = "CLAUDEOPUS", + CLAUDESONNET = "CLAUDESONNET", + CLAUDEHAIKU = "CLAUDEHAIKU", + GEMINI25FLASH = "GEMINI25FLASH", + GEMINI25PRO = "GEMINI25PRO", + GEMINI20FLASH = "GEMINI20FLASH", + GEMINI20FLASHLITE = "GEMINI20FLASHLITE", +} + +export const LLMModelType = { + GPT35TURBO: "GPT35TURBO", + GPT4TURBO: "GPT4TURBO", + GPT4O: "GPT4O", + GPT41: "GPT41", + GPT41MINI: "GPT41MINI", + GPT41NANO: "GPT41NANO", + LLAMA3: "LLAMA3", + CLAUDEOPUS: "CLAUDEOPUS", + CLAUDESONNET: "CLAUDESONNET", + CLAUDEHAIKU: "CLAUDEHAIKU", + GEMINI25FLASH: "GEMINI25FLASH", + GEMINI25PRO: "GEMINI25PRO", + GEMINI20FLASH: "GEMINI20FLASH", + GEMINI20FLASHLITE: "GEMINI20FLASHLITE", +}; + +export enum LLMMappings { + GPT35TURBO = "gpt-3.5-turbo", + GPT4TURBO = "gpt-4-turbo", + GPT4O = "gpt-4o", + GPT41 = "gpt-4.1-2025-04-14", + GPT41MINI = "gpt-4.1-mini-2025-04-14", + GPT41NANO = "gpt-4.1-nano-2025-04-14", + LLAMA3 = "llama3", + CLAUDEOPUS = "claude-3-opus-20240229", + CLAUDESONNET = "claude-3-7-sonnet-20250219", + CLAUDEHAIKU = "claude-3-5-haiku-20241022", + GEMINI25FLASH = "gemini-2.5-flash-preview-04-17", + GEMINI25PRO = "gemini-2.5-pro-preview-03-25", + GEMINI20FLASH = "gemini-2.0-flash", + GEMINI20FLASHLITE = "gemini-2.0-flash-lite", +} + +export const OpenAIModels = [ + LLMModelEnum.GPT35TURBO, + LLMModelEnum.GPT4TURBO, + LLMModelEnum.GPT4O, + LLMModelEnum.GPT41, + LLMModelEnum.GPT41MINI, + LLMModelEnum.GPT41NANO, +]; +export const ClaudeModels = [ + LLMModelEnum.CLAUDEOPUS, + LLMModelEnum.CLAUDESONNET, + LLMModelEnum.CLAUDEHAIKU, +]; + +export const GeminiModels = [ + LLMModelEnum.GEMINI25FLASH, + LLMModelEnum.GEMINI25PRO, + LLMModelEnum.GEMINI20FLASH, + LLMModelEnum.GEMINI20FLASHLITE, +]; + +export type LLMModelType = (typeof LLMModelType)[keyof typeof LLMModelType]; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a0bacdd..497bde7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -30,6 +30,9 @@ importers: apps/webapp: dependencies: + '@ai-sdk/openai': + specifier: ^1.3.21 + version: 1.3.22(zod@3.23.8) '@opentelemetry/api': specifier: 1.9.0 version: 1.9.0 @@ -39,6 +42,9 @@ importers: '@recall/database': specifier: workspace:* version: link:../../packages/database + '@recall/types': + specifier: workspace:* + version: link:../../packages/types '@remix-run/express': specifier: 2.16.7 version: 2.16.7(express@4.21.2)(typescript@5.8.3) @@ -66,6 +72,9 @@ importers: '@tailwindcss/postcss': specifier: ^4.1.7 version: 4.1.7 + ai: + specifier: 4.3.14 + version: 4.3.14(react@18.3.1)(zod@3.23.8) class-variance-authority: specifier: ^0.7.1 version: 0.7.1 @@ -81,6 +90,9 @@ importers: express: specifier: ^4.18.1 version: 4.21.2 + helix-ts: + specifier: ^1.0.4 + version: 1.0.4 isbot: specifier: ^4.1.0 version: 4.4.0 @@ -264,8 +276,62 @@ importers: specifier: 6.0.1 version: 6.0.1 + packages/types: + dependencies: + '@prisma/client': + specifier: 5.4.1 + version: 5.4.1(prisma@5.4.1) + devDependencies: + esbuild: + specifier: ^0.15.10 + version: 0.15.18 + prisma: + specifier: 5.4.1 + version: 5.4.1 + rimraf: + specifier: 6.0.1 + version: 6.0.1 + packages: + '@ai-sdk/openai@1.3.22': + resolution: {integrity: sha512-QwA+2EkG0QyjVR+7h6FE7iOu2ivNqAVMm9UJZkVxxTk5OIq5fFJDTEI/zICEMuHImTTXR2JjsL6EirJ28Jc4cw==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.0.0 + + '@ai-sdk/provider-utils@2.2.7': + resolution: {integrity: sha512-kM0xS3GWg3aMChh9zfeM+80vEZfXzR3JEUBdycZLtbRZ2TRT8xOj3WodGHPb06sUK5yD7pAXC/P7ctsi2fvUGQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.23.8 + + '@ai-sdk/provider-utils@2.2.8': + resolution: {integrity: sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.23.8 + + '@ai-sdk/provider@1.1.3': + resolution: {integrity: sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg==} + engines: {node: '>=18'} + + '@ai-sdk/react@1.2.11': + resolution: {integrity: sha512-+kPqLkJ3TWP6czaJPV+vzAKSUcKQ1598BUrcLHt56sH99+LhmIIW3ylZp0OfC3O6TR3eO1Lt0Yzw4R0mK6g9Gw==} + engines: {node: '>=18'} + peerDependencies: + react: ^18 || ^19 || ^19.0.0-rc + zod: ^3.23.8 + peerDependenciesMeta: + zod: + optional: true + + '@ai-sdk/ui-utils@1.2.10': + resolution: {integrity: sha512-GUj+LBoAlRQF1dL/M49jtufGqtLOMApxTpCmVjoRpIPt/dFALVL9RfqfvxwztyIwbK+IxGzcYjSGRsrWrj+86g==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.23.8 + '@alloc/quick-lru@5.2.0': resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==} engines: {node: '>=10'} @@ -1477,6 +1543,9 @@ packages: '@types/debug@4.1.12': resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} + '@types/diff-match-patch@1.0.36': + resolution: {integrity: sha512-xFdR6tkm0MWvBfO8xXCSsinYxHcqkQUlcHeSpMC2ukzOb6lwQAfDmW+Qt0AvlGd8HpsS28qKsB+oPeJn9I39jg==} + '@types/estree-jsx@1.0.5': resolution: {integrity: sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==} @@ -1819,6 +1888,16 @@ packages: resolution: {integrity: sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==} engines: {node: '>=8'} + ai@4.3.14: + resolution: {integrity: sha512-YAL7T7OIf6+nr0rT3kB+W4UU8lw3QZH+xtGud7sdOJHFufdn+4K5xSO3isXAM+5sxG0RgR4G9uD0ZoLPzuRTGg==} + engines: {node: '>=18'} + peerDependencies: + react: ^18 || ^19 || ^19.0.0-rc + zod: ^3.23.8 + peerDependenciesMeta: + react: + optional: true + ajv@6.12.6: resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} @@ -2056,6 +2135,10 @@ packages: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} + chalk@5.4.1: + resolution: {integrity: sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==} + engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} + character-entities-html4@2.1.0: resolution: {integrity: sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==} @@ -2379,6 +2462,9 @@ packages: engines: {node: '>=0.8.0'} hasBin: true + diff-match-patch@1.0.5: + resolution: {integrity: sha512-IayShXAgj/QMXgB0IWmKx+rOPuGMhqm5w6jvFxmVenXKIzRqTAAsbBPT3kWQeGANj3jGgvcvv4yK6SxqYmikgw==} + diff@5.2.0: resolution: {integrity: sha512-uIFDxqpRZGZ6ThOk84hEfqWoHx2devRFvpTZcTHur85vImfaxUbTW9Ryh4CpCuDnToOP1CEtXKIgytHBPVff5A==} engines: {node: '>=0.3.1'} @@ -3162,6 +3248,9 @@ packages: hast-util-whitespace@2.0.1: resolution: {integrity: sha512-nAxA0v8+vXSBDt3AnRUNjyRIQ0rD+ntpbAp4LnPkumc5M9yUbSMa4XDU9Q6etY4f1Wp4bNgvc1yjiZtsTTrSng==} + helix-ts@1.0.4: + resolution: {integrity: sha512-mugPlPyOMOTwEb4Dyl7y38eV5CTCrJhIHsHny5SmVJP3q4F1fcPRfYCpyJPPCpJ2tIJMGQVsPKC5M0p76vcdUg==} + hosted-git-info@2.8.9: resolution: {integrity: sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==} @@ -3478,6 +3567,9 @@ packages: json-schema-traverse@0.4.1: resolution: {integrity: sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==} + json-schema@0.4.0: + resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} + json-stable-stringify-without-jsonify@1.0.1: resolution: {integrity: sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==} @@ -3490,6 +3582,11 @@ packages: engines: {node: '>=6'} hasBin: true + jsondiffpatch@0.6.0: + resolution: {integrity: sha512-3QItJOXp2AP1uv7waBkao5nCvhEv+QmJAd38Ybq7wNI74Q+BBmnLn4EDKz6yI9xGAIQoUF87qHt+kc1IVxB4zQ==} + engines: {node: ^18.0.0 || >=20.0.0} + hasBin: true + jsonfile@4.0.0: resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==} @@ -4809,6 +4906,9 @@ packages: scheduler@0.23.2: resolution: {integrity: sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==} + secure-json-parse@2.7.0: + resolution: {integrity: sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==} + semver@5.7.2: resolution: {integrity: sha512-cBznnQ9KjJqU67B52RMC65CMarK2600WFnbkcaiwWq3xy/5haFJlshgnpjovMVJ+Hff49d8GEn0b87C5pDQ10g==} hasBin: true @@ -5048,6 +5148,11 @@ packages: resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==} engines: {node: '>= 0.4'} + swr@2.3.3: + resolution: {integrity: sha512-dshNvs3ExOqtZ6kJBaAsabhPdHyeY4P2cKwRCniDVifBMoG/SVI7tfLWqPXriVspf2Rg4tPzXJTnwaihIeFw2A==} + peerDependencies: + react: ^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + tailwind-merge@2.6.0: resolution: {integrity: sha512-P+Vu1qXfzediirmHOC3xKGAYeZtPcV9g76X+xg2FD4tYgR71ewMA35Y3sCz3zhiN/dwefRpJX0yBcgwi1fXNQA==} @@ -5104,6 +5209,10 @@ packages: text-table@0.2.0: resolution: {integrity: sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==} + throttleit@2.1.0: + resolution: {integrity: sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==} + engines: {node: '>=18'} + through2@2.0.5: resolution: {integrity: sha512-/mrRod8xqpA+IHSLyGCQ2s8SPHiCDEeQJSep1jqLYeEUClOFG2Qsh+4FU6G9VeqpZnGW/Su8LQGc4YKni5rYSQ==} @@ -5346,6 +5455,11 @@ packages: uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} + use-sync-external-store@1.5.0: + resolution: {integrity: sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==} + peerDependencies: + react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 + util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} @@ -5622,6 +5736,11 @@ packages: zod-error@1.5.0: resolution: {integrity: sha512-zzopKZ/skI9iXpqCEPj+iLCKl9b88E43ehcU+sbRoHuwGd9F1IDVGQ70TyO6kmfiRL1g4IXkjsXK+g1gLYl4WQ==} + zod-to-json-schema@3.24.5: + resolution: {integrity: sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g==} + peerDependencies: + zod: ^3.24.1 + zod-validation-error@1.5.0: resolution: {integrity: sha512-/7eFkAI4qV0tcxMBB/3+d2c1P6jzzZYdYSlBuAklzMuCrJu5bzJfHS0yVAS87dRHVlhftd6RFJDIvv03JgkSbw==} engines: {node: '>=16.0.0'} @@ -5636,6 +5755,47 @@ packages: snapshots: + '@ai-sdk/openai@1.3.22(zod@3.23.8)': + dependencies: + '@ai-sdk/provider': 1.1.3 + '@ai-sdk/provider-utils': 2.2.8(zod@3.23.8) + zod: 3.23.8 + + '@ai-sdk/provider-utils@2.2.7(zod@3.23.8)': + dependencies: + '@ai-sdk/provider': 1.1.3 + nanoid: 3.3.8 + secure-json-parse: 2.7.0 + zod: 3.23.8 + + '@ai-sdk/provider-utils@2.2.8(zod@3.23.8)': + dependencies: + '@ai-sdk/provider': 1.1.3 + nanoid: 3.3.8 + secure-json-parse: 2.7.0 + zod: 3.23.8 + + '@ai-sdk/provider@1.1.3': + dependencies: + json-schema: 0.4.0 + + '@ai-sdk/react@1.2.11(react@18.3.1)(zod@3.23.8)': + dependencies: + '@ai-sdk/provider-utils': 2.2.7(zod@3.23.8) + '@ai-sdk/ui-utils': 1.2.10(zod@3.23.8) + react: 18.3.1 + swr: 2.3.3(react@18.3.1) + throttleit: 2.1.0 + optionalDependencies: + zod: 3.23.8 + + '@ai-sdk/ui-utils@1.2.10(zod@3.23.8)': + dependencies: + '@ai-sdk/provider': 1.1.3 + '@ai-sdk/provider-utils': 2.2.7(zod@3.23.8) + zod: 3.23.8 + zod-to-json-schema: 3.24.5(zod@3.23.8) + '@alloc/quick-lru@5.2.0': {} '@ampproject/remapping@2.3.0': @@ -6922,6 +7082,8 @@ snapshots: dependencies: '@types/ms': 2.1.0 + '@types/diff-match-patch@1.0.36': {} + '@types/estree-jsx@1.0.5': dependencies: '@types/estree': 1.0.7 @@ -7324,6 +7486,18 @@ snapshots: clean-stack: 2.2.0 indent-string: 4.0.0 + ai@4.3.14(react@18.3.1)(zod@3.23.8): + dependencies: + '@ai-sdk/provider': 1.1.3 + '@ai-sdk/provider-utils': 2.2.7(zod@3.23.8) + '@ai-sdk/react': 1.2.11(react@18.3.1)(zod@3.23.8) + '@ai-sdk/ui-utils': 1.2.10(zod@3.23.8) + '@opentelemetry/api': 1.9.0 + jsondiffpatch: 0.6.0 + zod: 3.23.8 + optionalDependencies: + react: 18.3.1 + ajv@6.12.6: dependencies: fast-deep-equal: 3.1.3 @@ -7610,6 +7784,8 @@ snapshots: ansi-styles: 4.3.0 supports-color: 7.2.0 + chalk@5.4.1: {} + character-entities-html4@2.1.0: {} character-entities-legacy@3.0.0: {} @@ -7905,6 +8081,8 @@ snapshots: defined: 1.0.1 minimist: 1.2.8 + diff-match-patch@1.0.5: {} + diff@5.2.0: {} dir-glob@3.0.1: @@ -8968,6 +9146,8 @@ snapshots: hast-util-whitespace@2.0.1: {} + helix-ts@1.0.4: {} + hosted-git-info@2.8.9: {} hosted-git-info@6.1.3: @@ -9249,6 +9429,8 @@ snapshots: json-schema-traverse@0.4.1: {} + json-schema@0.4.0: {} + json-stable-stringify-without-jsonify@1.0.1: {} json5@1.0.2: @@ -9257,6 +9439,12 @@ snapshots: json5@2.2.3: {} + jsondiffpatch@0.6.0: + dependencies: + '@types/diff-match-patch': 1.0.36 + chalk: 5.4.1 + diff-match-patch: 1.0.5 + jsonfile@4.0.0: optionalDependencies: graceful-fs: 4.2.11 @@ -10707,6 +10895,8 @@ snapshots: dependencies: loose-envify: 1.4.0 + secure-json-parse@2.7.0: {} + semver@5.7.2: {} semver@6.3.1: {} @@ -10995,6 +11185,12 @@ snapshots: supports-preserve-symlinks-flag@1.0.0: {} + swr@2.3.3(react@18.3.1): + dependencies: + dequal: 2.0.3 + react: 18.3.1 + use-sync-external-store: 1.5.0(react@18.3.1) + tailwind-merge@2.6.0: {} tailwind-scrollbar-hide@2.0.0(tailwindcss@4.1.7): @@ -11082,6 +11278,8 @@ snapshots: text-table@0.2.0: {} + throttleit@2.1.0: {} + through2@2.0.5: dependencies: readable-stream: 2.3.8 @@ -11343,6 +11541,10 @@ snapshots: dependencies: punycode: 2.3.1 + use-sync-external-store@1.5.0(react@18.3.1): + dependencies: + react: 18.3.1 + util-deprecate@1.0.2: {} util@0.12.5: @@ -11624,6 +11826,10 @@ snapshots: dependencies: zod: 3.23.8 + zod-to-json-schema@3.24.5(zod@3.23.8): + dependencies: + zod: 3.23.8 + zod-validation-error@1.5.0(zod@3.23.8): dependencies: zod: 3.23.8