From 1db2628af44f468c34b0b1d78fedb5c86d20cfc7 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Mon, 20 Oct 2025 12:18:14 +0530 Subject: [PATCH] fix: remove attribute login --- .../app/services/knowledgeGraph.server.ts | 265 ++++++++---------- apps/webapp/app/services/prompts/nodes.ts | 76 +---- 2 files changed, 128 insertions(+), 213 deletions(-) diff --git a/apps/webapp/app/services/knowledgeGraph.server.ts b/apps/webapp/app/services/knowledgeGraph.server.ts index 9a7ace8..c8ac547 100644 --- a/apps/webapp/app/services/knowledgeGraph.server.ts +++ b/apps/webapp/app/services/knowledgeGraph.server.ts @@ -12,11 +12,7 @@ import { import { logger } from "./logger.service"; import { ClusteringService } from "./clustering.server"; import crypto from "crypto"; -import { - dedupeNodes, - extractAttributes, - extractEntities, -} from "./prompts/nodes"; +import { dedupeNodes, extractEntities } from "./prompts/nodes"; import { extractStatements, extractStatementsOSS, @@ -40,7 +36,11 @@ import { saveTriple, searchStatementsByEmbedding, } from "./graphModels/statement"; -import { getEmbedding, makeModelCall, isProprietaryModel } from "~/lib/model.server"; +import { + getEmbedding, + makeModelCall, + isProprietaryModel, +} from "~/lib/model.server"; import { runQuery } from "~/lib/neo4j.server"; import { Apps, getNodeTypesString } from "~/utils/presets/nodes"; import { normalizePrompt, normalizeDocumentPrompt } from "./prompts"; @@ -419,8 +419,8 @@ export class KnowledgeGraphService { logger.log(`Processing time: ${processingTimeMs} ms`); // Count only truly new statements (exclude duplicates) - const newStatementsCount = updatedTriples.filter(triple => - triple.statement.createdAt >= episode.createdAt + const newStatementsCount = updatedTriples.filter( + (triple) => triple.statement.createdAt >= episode.createdAt, ).length; return { @@ -442,7 +442,10 @@ export class KnowledgeGraphService { private async extractEntities( episode: EpisodicNode, previousEpisodes: EpisodicNode[], - tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } }, + tokenMetrics: { + high: { input: number; output: number; total: number }; + low: { input: number; output: number; total: number }; + }, ): Promise { // Use the prompt library to get the appropriate prompts const context = { @@ -460,14 +463,20 @@ export class KnowledgeGraphService { let responseText = ""; // Entity extraction requires HIGH complexity (creative reasoning, nuanced NER) - await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => { - responseText = text; - if (usage) { - tokenMetrics.high.input += usage.promptTokens; - tokenMetrics.high.output += usage.completionTokens; - tokenMetrics.high.total += usage.totalTokens; - } - }, undefined, 'high'); + await makeModelCall( + false, + messages as CoreMessage[], + (text, _model, usage) => { + responseText = text; + if (usage) { + tokenMetrics.high.input += usage.promptTokens; + tokenMetrics.high.output += usage.completionTokens; + tokenMetrics.high.total += usage.totalTokens; + } + }, + undefined, + "high", + ); // Convert to EntityNode objects let entities: EntityNode[] = []; @@ -478,19 +487,23 @@ export class KnowledgeGraphService { responseText = outputMatch[1].trim(); const parsedResponse = JSON.parse(responseText || "[]"); // Handle both old format {entities: [...]} and new format [...] - const extractedEntities = Array.isArray(parsedResponse) ? parsedResponse : (parsedResponse.entities || []); + const extractedEntities = Array.isArray(parsedResponse) + ? parsedResponse + : parsedResponse.entities || []; // Batch generate embeddings for entity names - const entityNames = Array.isArray(extractedEntities[0]) ? extractedEntities : extractedEntities.map((entity: any) => entity.name || entity); + const entityNames = Array.isArray(extractedEntities[0]) + ? extractedEntities + : extractedEntities.map((entity: any) => entity.name || entity); const nameEmbeddings = await Promise.all( entityNames.map((name: string) => this.getEmbedding(name)), ); entities = extractedEntities.map((entity: any, index: number) => ({ uuid: crypto.randomUUID(), - name: typeof entity === 'string' ? entity : entity.name, + name: typeof entity === "string" ? entity : entity.name, type: undefined, // Type will be inferred from statements - attributes: typeof entity === 'string' ? {} : (entity.attributes || {}), + attributes: typeof entity === "string" ? {} : entity.attributes || {}, nameEmbedding: nameEmbeddings[index], typeEmbedding: undefined, // No type embedding needed createdAt: new Date(), @@ -512,7 +525,10 @@ export class KnowledgeGraphService { expanded: EntityNode[]; }, previousEpisodes: EpisodicNode[], - tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } }, + tokenMetrics: { + high: { input: number; output: number; total: number }; + low: { input: number; output: number; total: number }; + }, ): Promise { // Use the prompt library to get the appropriate prompts const context = { @@ -534,22 +550,28 @@ export class KnowledgeGraphService { referenceTime: episode.validAt.toISOString(), }; - console.log("proprietary model", isProprietaryModel(undefined, 'high')); + console.log("proprietary model", isProprietaryModel(undefined, "high")); // Statement extraction requires HIGH complexity (causal reasoning, emotional context) // Choose between proprietary and OSS prompts based on model type - const messages = isProprietaryModel(undefined, 'high') + const messages = isProprietaryModel(undefined, "high") ? extractStatements(context) : extractStatementsOSS(context); let responseText = ""; - await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => { - responseText = text; - if (usage) { - tokenMetrics.high.input += usage.promptTokens; - tokenMetrics.high.output += usage.completionTokens; - tokenMetrics.high.total += usage.totalTokens; - } - }, undefined, 'high'); + await makeModelCall( + false, + messages as CoreMessage[], + (text, _model, usage) => { + responseText = text; + if (usage) { + tokenMetrics.high.input += usage.promptTokens; + tokenMetrics.high.output += usage.completionTokens; + tokenMetrics.high.total += usage.totalTokens; + } + }, + undefined, + "high", + ); const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); if (outputMatch && outputMatch[1]) { @@ -561,9 +583,11 @@ export class KnowledgeGraphService { // Parse the statements from the LLM response const parsedResponse = JSON.parse(responseText || "[]"); // Handle both old format {"edges": [...]} and new format [...] - const extractedTriples: ExtractedTripleData[] = Array.isArray(parsedResponse) + const extractedTriples: ExtractedTripleData[] = Array.isArray( + parsedResponse, + ) ? parsedResponse - : (parsedResponse.edges || []); + : parsedResponse.edges || []; console.log(`extracted triples length: ${extractedTriples.length}`); @@ -683,7 +707,10 @@ export class KnowledgeGraphService { triples: Triple[], episode: EpisodicNode, previousEpisodes: EpisodicNode[], - tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } }, + tokenMetrics: { + high: { input: number; output: number; total: number }; + low: { input: number; output: number; total: number }; + }, ): Promise { // Step 1: Extract unique entities from triples const uniqueEntitiesMap = new Map(); @@ -810,14 +837,20 @@ export class KnowledgeGraphService { let responseText = ""; // Entity deduplication is LOW complexity (pattern matching, similarity comparison) - await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => { - responseText = text; - if (usage) { - tokenMetrics.low.input += usage.promptTokens; - tokenMetrics.low.output += usage.completionTokens; - tokenMetrics.low.total += usage.totalTokens; - } - }, undefined, 'low'); + await makeModelCall( + false, + messages as CoreMessage[], + (text, _model, usage) => { + responseText = text; + if (usage) { + tokenMetrics.low.input += usage.promptTokens; + tokenMetrics.low.output += usage.completionTokens; + tokenMetrics.low.total += usage.totalTokens; + } + }, + undefined, + "low", + ); // Step 5: Process LLM response const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); @@ -898,7 +931,10 @@ export class KnowledgeGraphService { triples: Triple[], episode: EpisodicNode, previousEpisodes: EpisodicNode[], - tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } }, + tokenMetrics: { + high: { input: number; output: number; total: number }; + low: { input: number; output: number; total: number }; + }, ): Promise<{ resolvedStatements: Triple[]; invalidatedStatements: string[]; @@ -911,7 +947,10 @@ export class KnowledgeGraphService { } // Step 1: Collect all potential matches for all triples at once - const allPotentialMatches: Map[]> = new Map(); + const allPotentialMatches: Map< + string, + Omit[] + > = new Map(); const allExistingTripleData: Map = new Map(); // For preparing the LLM context @@ -971,7 +1010,8 @@ export class KnowledgeGraphService { } // Phase 3: Check related memories for contradictory statements - const previousEpisodesStatements: Omit[] = []; + const previousEpisodesStatements: Omit[] = + []; await Promise.all( previousEpisodes.map(async (episode) => { @@ -1052,14 +1092,20 @@ export class KnowledgeGraphService { let responseText = ""; // Statement resolution is LOW complexity (rule-based duplicate/contradiction detection) - await makeModelCall(false, messages, (text, _model, usage) => { - responseText = text; - if (usage) { - tokenMetrics.low.input += usage.promptTokens; - tokenMetrics.low.output += usage.completionTokens; - tokenMetrics.low.total += usage.totalTokens; - } - }, undefined, 'low'); + await makeModelCall( + false, + messages, + (text, _model, usage) => { + responseText = text; + if (usage) { + tokenMetrics.low.input += usage.promptTokens; + tokenMetrics.low.output += usage.completionTokens; + tokenMetrics.low.total += usage.totalTokens; + } + }, + undefined, + "low", + ); try { // Extract the JSON response from the output tags @@ -1134,90 +1180,6 @@ export class KnowledgeGraphService { return { resolvedStatements, invalidatedStatements }; } - /** - * Add attributes to entity nodes based on the resolved statements - */ - private async addAttributesToEntities( - triples: Triple[], - episode: EpisodicNode, - tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } }, - ): Promise { - // Collect all unique entities from the triples - const entityMap = new Map(); - - // Add all subjects, predicates, and objects to the map - triples.forEach((triple) => { - if (triple.subject) { - entityMap.set(triple.subject.uuid, triple.subject); - } - if (triple.predicate) { - entityMap.set(triple.predicate.uuid, triple.predicate); - } - if (triple.object) { - entityMap.set(triple.object.uuid, triple.object); - } - }); - - // Convert the map to an array of entities - const entities = Array.from(entityMap.values()); - - if (entities.length === 0) { - return triples; // No entities to process - } - - // Prepare simplified context for the LLM - const context = { - episodeContent: episode.content, - entities: entities.map((entity) => ({ - uuid: entity.uuid, - name: entity.name, - currentAttributes: entity.attributes || {}, - })), - }; - - // Create a prompt for the LLM to extract attributes - const messages = extractAttributes(context); - - let responseText = ""; - - // Attribute extraction is LOW complexity (simple key-value extraction) - await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => { - responseText = text; - if (usage) { - tokenMetrics.low.input += usage.promptTokens; - tokenMetrics.low.output += usage.completionTokens; - tokenMetrics.low.total += usage.totalTokens; - } - }, undefined, 'low'); - - try { - const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); - if (outputMatch && outputMatch[1]) { - responseText = outputMatch[1].trim(); - } - // Parse the LLM response - const responseData = JSON.parse(responseText); - const updatedEntities = responseData.entities || []; - - // Update entity attributes and save them - for (const updatedEntity of updatedEntities) { - const entity = entityMap.get(updatedEntity.uuid); - if (entity) { - // Merge the existing attributes with the new ones - entity.attributes = { - ...updatedEntity.attributes, - }; - } - } - - logger.info(`Updated attributes for ${updatedEntities.length} entities`); - } catch (error) { - logger.error("Error processing entity attributes", { error }); - } - - return triples; - } - /** * Normalize an episode by extracting entities and creating nodes and statements */ @@ -1226,7 +1188,10 @@ export class KnowledgeGraphService { source: string, userId: string, prisma: PrismaClient, - tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } }, + tokenMetrics: { + high: { input: number; output: number; total: number }; + low: { input: number; output: number; total: number }; + }, episodeTimestamp?: Date, sessionContext?: string, contentType?: EpisodeType, @@ -1263,14 +1228,20 @@ export class KnowledgeGraphService { : normalizePrompt(context); // Normalization is LOW complexity (text cleaning and standardization) let responseText = ""; - await makeModelCall(false, messages, (text, _model, usage) => { - responseText = text; - if (usage) { - tokenMetrics.low.input += usage.promptTokens; - tokenMetrics.low.output += usage.completionTokens; - tokenMetrics.low.total += usage.totalTokens; - } - }, undefined, 'high'); + await makeModelCall( + false, + messages, + (text, _model, usage) => { + responseText = text; + if (usage) { + tokenMetrics.low.input += usage.promptTokens; + tokenMetrics.low.output += usage.completionTokens; + tokenMetrics.low.total += usage.totalTokens; + } + }, + undefined, + "high", + ); let normalizedEpisodeBody = ""; const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); if (outputMatch && outputMatch[1]) { diff --git a/apps/webapp/app/services/prompts/nodes.ts b/apps/webapp/app/services/prompts/nodes.ts index 020f622..fd8a865 100644 --- a/apps/webapp/app/services/prompts/nodes.ts +++ b/apps/webapp/app/services/prompts/nodes.ts @@ -8,8 +8,8 @@ import { type CoreMessage } from "ai"; * Extract entities from content using unified approach (works for both conversations and documents) */ export const extractEntities = ( - context: Record, - extractionMode: 'conversation' | 'document' = 'conversation' + context: Record, + extractionMode: "conversation" | "document" = "conversation", ): CoreMessage[] => { const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph. Your primary task is to extract all significant entities mentioned in the conversation, treating both concrete entities and type/concept entities as first-class nodes. @@ -204,13 +204,18 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr 4. End with exactly: 5. NO additional text, NO comments, NO explanations`; - const contentLabel = extractionMode === 'conversation' ? 'CURRENT EPISODE' : 'TEXT'; + const contentLabel = + extractionMode === "conversation" ? "CURRENT EPISODE" : "TEXT"; const userPrompt = ` -${extractionMode === 'conversation' ? ` +${ + extractionMode === "conversation" + ? ` ${JSON.stringify(context.previousEpisodes || [], null, 2)} -` : ''}<${contentLabel}> +` + : "" +}<${contentLabel}> ${context.episodeContent} @@ -222,7 +227,6 @@ ${context.episodeContent} ]; }; - /** * Resolve entity duplications */ @@ -325,63 +329,3 @@ ${JSON.stringify(context.extracted_nodes, null, 2)} }, ]; }; - -export const extractAttributes = ( - context: Record, -): CoreMessage[] => { - const sysPrompt = ` -You are an AI assistant that extracts and enhances entity attributes based on context. -Your task is to analyze entities and provide appropriate attribute values based on available information. - -For each entity: -1. Analyze the context to identify relevant attributes for the entity -2. Extract appropriate values from the episode content if available -3. Focus on factual, descriptive attributes rather than type classifications -4. Give empty attributes object ({}) when there are no attributes to update -5. Only include attributes that you're adding or modifying -6. I'll merge your new attributes with existing ones, so only provide updates - -Common attribute types to consider: -- Descriptive properties (color, size, status, etc.) -- Relational context (role, position, relationship, etc.) -- Temporal information (duration, frequency, etc.) -- Qualitative aspects (importance, preference, etc.) - -## CRITICAL OUTPUT FORMAT REQUIREMENTS: - -**YOU MUST STRICTLY FOLLOW THESE FORMAT RULES:** -1. **ALWAYS use tags** - Never use any other tag format -2. **ONLY output valid JSON** within the tags -3. **NO additional text** before or after the tags -4. **NO comments** inside the JSON -5. **REQUIRED structure:** Must follow exact JSON schema shown below - - -{ - "entities": [ - { - "uuid": "entity-uuid", - "attributes": { - "attributeName1": "value1", - "attributeName2": "value2" - } - } - ] -} -`; - - const userPrompt = ` - -${JSON.stringify(context.entities, null, 2)} - - - -${context.episodeContent} - - -Based on the above information, please extract and enhance attributes for each entity based on the context. Return only the uuid and updated attributes for each entity.`; - return [ - { role: "system", content: sysPrompt }, - { role: "user", content: userPrompt }, - ]; -};