fix: remove attribute login

2026-01-10 08:48:29 +00:00 · 2025-10-20 12:18:14 +05:30 · 2025-10-20 12:18:14 +05:30 · 1db2628af4
commit 1db2628af4
parent 95636f96a8
2 changed files with 128 additions and 213 deletions
--- a/apps/webapp/app/services/knowledgeGraph.server.ts
+++ b/apps/webapp/app/services/knowledgeGraph.server.ts
@ -12,11 +12,7 @@ import {
 import { logger } from "./logger.service";
 import { ClusteringService } from "./clustering.server";
 import crypto from "crypto";
-import {
-  dedupeNodes,
-  extractAttributes,
-  extractEntities,
-} from "./prompts/nodes";
+import { dedupeNodes, extractEntities } from "./prompts/nodes";
 import {
  extractStatements,
  extractStatementsOSS,
@ -40,7 +36,11 @@ import {
  saveTriple,
  searchStatementsByEmbedding,
 } from "./graphModels/statement";
-import { getEmbedding, makeModelCall, isProprietaryModel } from "~/lib/model.server";
+import {
+  getEmbedding,
+  makeModelCall,
+  isProprietaryModel,
+} from "~/lib/model.server";
 import { runQuery } from "~/lib/neo4j.server";
 import { Apps, getNodeTypesString } from "~/utils/presets/nodes";
 import { normalizePrompt, normalizeDocumentPrompt } from "./prompts";
@ -419,8 +419,8 @@ export class KnowledgeGraphService {
      logger.log(`Processing time: ${processingTimeMs} ms`);

      // Count only truly new statements (exclude duplicates)
-      const newStatementsCount = updatedTriples.filter(triple =>
-        triple.statement.createdAt >= episode.createdAt
+      const newStatementsCount = updatedTriples.filter(
+        (triple) => triple.statement.createdAt >= episode.createdAt,
      ).length;

      return {
@ -442,7 +442,10 @@ export class KnowledgeGraphService {
  private async extractEntities(
    episode: EpisodicNode,
    previousEpisodes: EpisodicNode[],
-    tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
+    tokenMetrics: {
+      high: { input: number; output: number; total: number };
+      low: { input: number; output: number; total: number };
+    },
  ): Promise<EntityNode[]> {
    // Use the prompt library to get the appropriate prompts
    const context = {
@ -460,14 +463,20 @@ export class KnowledgeGraphService {
    let responseText = "";

    // Entity extraction requires HIGH complexity (creative reasoning, nuanced NER)
-    await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
-      responseText = text;
-      if (usage) {
-        tokenMetrics.high.input += usage.promptTokens;
-        tokenMetrics.high.output += usage.completionTokens;
-        tokenMetrics.high.total += usage.totalTokens;
-      }
-    }, undefined, 'high');
+    await makeModelCall(
+      false,
+      messages as CoreMessage[],
+      (text, _model, usage) => {
+        responseText = text;
+        if (usage) {
+          tokenMetrics.high.input += usage.promptTokens;
+          tokenMetrics.high.output += usage.completionTokens;
+          tokenMetrics.high.total += usage.totalTokens;
+        }
+      },
+      undefined,
+      "high",
+    );

    // Convert to EntityNode objects
    let entities: EntityNode[] = [];
@ -478,19 +487,23 @@ export class KnowledgeGraphService {
      responseText = outputMatch[1].trim();
      const parsedResponse = JSON.parse(responseText || "[]");
      // Handle both old format {entities: [...]} and new format [...]
-      const extractedEntities = Array.isArray(parsedResponse) ? parsedResponse : (parsedResponse.entities || []);
+      const extractedEntities = Array.isArray(parsedResponse)
+        ? parsedResponse
+        : parsedResponse.entities || [];

      // Batch generate embeddings for entity names
-      const entityNames = Array.isArray(extractedEntities[0]) ? extractedEntities : extractedEntities.map((entity: any) => entity.name || entity);
+      const entityNames = Array.isArray(extractedEntities[0])
+        ? extractedEntities
+        : extractedEntities.map((entity: any) => entity.name || entity);
      const nameEmbeddings = await Promise.all(
        entityNames.map((name: string) => this.getEmbedding(name)),
      );

      entities = extractedEntities.map((entity: any, index: number) => ({
        uuid: crypto.randomUUID(),
-        name: typeof entity === 'string' ? entity : entity.name,
+        name: typeof entity === "string" ? entity : entity.name,
        type: undefined, // Type will be inferred from statements
-        attributes: typeof entity === 'string' ? {} : (entity.attributes || {}),
+        attributes: typeof entity === "string" ? {} : entity.attributes || {},
        nameEmbedding: nameEmbeddings[index],
        typeEmbedding: undefined, // No type embedding needed
        createdAt: new Date(),
@ -512,7 +525,10 @@ export class KnowledgeGraphService {
      expanded: EntityNode[];
    },
    previousEpisodes: EpisodicNode[],
-    tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
+    tokenMetrics: {
+      high: { input: number; output: number; total: number };
+      low: { input: number; output: number; total: number };
+    },
  ): Promise<Triple[]> {
    // Use the prompt library to get the appropriate prompts
    const context = {
@ -534,22 +550,28 @@ export class KnowledgeGraphService {
      referenceTime: episode.validAt.toISOString(),
    };

-    console.log("proprietary model", isProprietaryModel(undefined, 'high'));
+    console.log("proprietary model", isProprietaryModel(undefined, "high"));
    // Statement extraction requires HIGH complexity (causal reasoning, emotional context)
    // Choose between proprietary and OSS prompts based on model type
-    const messages = isProprietaryModel(undefined, 'high')
+    const messages = isProprietaryModel(undefined, "high")
      ? extractStatements(context)
      : extractStatementsOSS(context);

    let responseText = "";
-    await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
-      responseText = text;
-      if (usage) {
-        tokenMetrics.high.input += usage.promptTokens;
-        tokenMetrics.high.output += usage.completionTokens;
-        tokenMetrics.high.total += usage.totalTokens;
-      }
-    }, undefined, 'high');
+    await makeModelCall(
+      false,
+      messages as CoreMessage[],
+      (text, _model, usage) => {
+        responseText = text;
+        if (usage) {
+          tokenMetrics.high.input += usage.promptTokens;
+          tokenMetrics.high.output += usage.completionTokens;
+          tokenMetrics.high.total += usage.totalTokens;
+        }
+      },
+      undefined,
+      "high",
+    );

    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
    if (outputMatch && outputMatch[1]) {
@ -561,9 +583,11 @@ export class KnowledgeGraphService {
    // Parse the statements from the LLM response
    const parsedResponse = JSON.parse(responseText || "[]");
    // Handle both old format {"edges": [...]} and new format [...]
-    const extractedTriples: ExtractedTripleData[] = Array.isArray(parsedResponse)
+    const extractedTriples: ExtractedTripleData[] = Array.isArray(
+      parsedResponse,
+    )
      ? parsedResponse
-      : (parsedResponse.edges || []);
+      : parsedResponse.edges || [];

    console.log(`extracted triples length: ${extractedTriples.length}`);

@ -683,7 +707,10 @@ export class KnowledgeGraphService {
    triples: Triple[],
    episode: EpisodicNode,
    previousEpisodes: EpisodicNode[],
-    tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
+    tokenMetrics: {
+      high: { input: number; output: number; total: number };
+      low: { input: number; output: number; total: number };
+    },
  ): Promise<Triple[]> {
    // Step 1: Extract unique entities from triples
    const uniqueEntitiesMap = new Map<string, EntityNode>();
@ -810,14 +837,20 @@ export class KnowledgeGraphService {
    let responseText = "";

    // Entity deduplication is LOW complexity (pattern matching, similarity comparison)
-    await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
-      responseText = text;
-      if (usage) {
-        tokenMetrics.low.input += usage.promptTokens;
-        tokenMetrics.low.output += usage.completionTokens;
-        tokenMetrics.low.total += usage.totalTokens;
-      }
-    }, undefined, 'low');
+    await makeModelCall(
+      false,
+      messages as CoreMessage[],
+      (text, _model, usage) => {
+        responseText = text;
+        if (usage) {
+          tokenMetrics.low.input += usage.promptTokens;
+          tokenMetrics.low.output += usage.completionTokens;
+          tokenMetrics.low.total += usage.totalTokens;
+        }
+      },
+      undefined,
+      "low",
+    );

    // Step 5: Process LLM response
    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
@ -898,7 +931,10 @@ export class KnowledgeGraphService {
    triples: Triple[],
    episode: EpisodicNode,
    previousEpisodes: EpisodicNode[],
-    tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
+    tokenMetrics: {
+      high: { input: number; output: number; total: number };
+      low: { input: number; output: number; total: number };
+    },
  ): Promise<{
    resolvedStatements: Triple[];
    invalidatedStatements: string[];
@ -911,7 +947,10 @@ export class KnowledgeGraphService {
    }

    // Step 1: Collect all potential matches for all triples at once
-    const allPotentialMatches: Map<string, Omit<StatementNode, "factEmbedding">[]> = new Map();
+    const allPotentialMatches: Map<
+      string,
+      Omit<StatementNode, "factEmbedding">[]
+    > = new Map();
    const allExistingTripleData: Map<string, Triple> = new Map();

    // For preparing the LLM context
@ -971,7 +1010,8 @@ export class KnowledgeGraphService {
      }

      // Phase 3: Check related memories for contradictory statements
-      const previousEpisodesStatements: Omit<StatementNode, "factEmbedding">[] = [];
+      const previousEpisodesStatements: Omit<StatementNode, "factEmbedding">[] =
+        [];

      await Promise.all(
        previousEpisodes.map(async (episode) => {
@ -1052,14 +1092,20 @@ export class KnowledgeGraphService {
      let responseText = "";

      // Statement resolution is LOW complexity (rule-based duplicate/contradiction detection)
-      await makeModelCall(false, messages, (text, _model, usage) => {
-        responseText = text;
-        if (usage) {
-          tokenMetrics.low.input += usage.promptTokens;
-          tokenMetrics.low.output += usage.completionTokens;
-          tokenMetrics.low.total += usage.totalTokens;
-        }
-      }, undefined, 'low');
+      await makeModelCall(
+        false,
+        messages,
+        (text, _model, usage) => {
+          responseText = text;
+          if (usage) {
+            tokenMetrics.low.input += usage.promptTokens;
+            tokenMetrics.low.output += usage.completionTokens;
+            tokenMetrics.low.total += usage.totalTokens;
+          }
+        },
+        undefined,
+        "low",
+      );

      try {
        // Extract the JSON response from the output tags
@ -1134,90 +1180,6 @@ export class KnowledgeGraphService {
    return { resolvedStatements, invalidatedStatements };
  }

-  /**
-   * Add attributes to entity nodes based on the resolved statements
-   */
-  private async addAttributesToEntities(
-    triples: Triple[],
-    episode: EpisodicNode,
-    tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
-  ): Promise<Triple[]> {
-    // Collect all unique entities from the triples
-    const entityMap = new Map<string, EntityNode>();
-
-    // Add all subjects, predicates, and objects to the map
-    triples.forEach((triple) => {
-      if (triple.subject) {
-        entityMap.set(triple.subject.uuid, triple.subject);
-      }
-      if (triple.predicate) {
-        entityMap.set(triple.predicate.uuid, triple.predicate);
-      }
-      if (triple.object) {
-        entityMap.set(triple.object.uuid, triple.object);
-      }
-    });
-
-    // Convert the map to an array of entities
-    const entities = Array.from(entityMap.values());
-
-    if (entities.length === 0) {
-      return triples; // No entities to process
-    }
-
-    // Prepare simplified context for the LLM
-    const context = {
-      episodeContent: episode.content,
-      entities: entities.map((entity) => ({
-        uuid: entity.uuid,
-        name: entity.name,
-        currentAttributes: entity.attributes || {},
-      })),
-    };
-
-    // Create a prompt for the LLM to extract attributes
-    const messages = extractAttributes(context);
-
-    let responseText = "";
-
-    // Attribute extraction is LOW complexity (simple key-value extraction)
-    await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
-      responseText = text;
-      if (usage) {
-        tokenMetrics.low.input += usage.promptTokens;
-        tokenMetrics.low.output += usage.completionTokens;
-        tokenMetrics.low.total += usage.totalTokens;
-      }
-    }, undefined, 'low');
-
-    try {
-      const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
-      if (outputMatch && outputMatch[1]) {
-        responseText = outputMatch[1].trim();
-      }
-      // Parse the LLM response
-      const responseData = JSON.parse(responseText);
-      const updatedEntities = responseData.entities || [];
-
-      // Update entity attributes and save them
-      for (const updatedEntity of updatedEntities) {
-        const entity = entityMap.get(updatedEntity.uuid);
-        if (entity) {
-          // Merge the existing attributes with the new ones
-          entity.attributes = {
-            ...updatedEntity.attributes,
-          };
-        }
-      }
-
-      logger.info(`Updated attributes for ${updatedEntities.length} entities`);
-    } catch (error) {
-      logger.error("Error processing entity attributes", { error });
-    }
-
-    return triples;
-  }
-
  /**
   * Normalize an episode by extracting entities and creating nodes and statements
   */
@ -1226,7 +1188,10 @@ export class KnowledgeGraphService {
    source: string,
    userId: string,
    prisma: PrismaClient,
-    tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
+    tokenMetrics: {
+      high: { input: number; output: number; total: number };
+      low: { input: number; output: number; total: number };
+    },
    episodeTimestamp?: Date,
    sessionContext?: string,
    contentType?: EpisodeType,
@ -1263,14 +1228,20 @@ export class KnowledgeGraphService {
        : normalizePrompt(context);
    // Normalization is LOW complexity (text cleaning and standardization)
    let responseText = "";
-    await makeModelCall(false, messages, (text, _model, usage) => {
-      responseText = text;
-      if (usage) {
-        tokenMetrics.low.input += usage.promptTokens;
-        tokenMetrics.low.output += usage.completionTokens;
-        tokenMetrics.low.total += usage.totalTokens;
-      }
-    }, undefined, 'high');
+    await makeModelCall(
+      false,
+      messages,
+      (text, _model, usage) => {
+        responseText = text;
+        if (usage) {
+          tokenMetrics.low.input += usage.promptTokens;
+          tokenMetrics.low.output += usage.completionTokens;
+          tokenMetrics.low.total += usage.totalTokens;
+        }
+      },
+      undefined,
+      "high",
+    );
    let normalizedEpisodeBody = "";
    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
    if (outputMatch && outputMatch[1]) {
--- a/apps/webapp/app/services/prompts/nodes.ts
+++ b/apps/webapp/app/services/prompts/nodes.ts
@ -8,8 +8,8 @@ import { type CoreMessage } from "ai";
 * Extract entities from content using unified approach (works for both conversations and documents)
 */
 export const extractEntities = (
-  context: Record<string, any>, 
-  extractionMode: 'conversation' | 'document' = 'conversation'
+  context: Record<string, any>,
+  extractionMode: "conversation" | "document" = "conversation",
 ): CoreMessage[] => {
  const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
 Your primary task is to extract all significant entities mentioned in the conversation, treating both concrete entities and type/concept entities as first-class nodes.
@ -204,13 +204,18 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
 4. End with exactly: </output>
 5. NO additional text, NO comments, NO explanations`;

-  const contentLabel = extractionMode === 'conversation' ? 'CURRENT EPISODE' : 'TEXT';
+  const contentLabel =
+    extractionMode === "conversation" ? "CURRENT EPISODE" : "TEXT";
  const userPrompt = `
-${extractionMode === 'conversation' ? `<PREVIOUS EPISODES>
+${
+  extractionMode === "conversation"
+    ? `<PREVIOUS EPISODES>
 ${JSON.stringify(context.previousEpisodes || [], null, 2)}
 </PREVIOUS EPISODES>

-` : ''}<${contentLabel}>
+`
+    : ""
+}<${contentLabel}>
 ${context.episodeContent}
 </${contentLabel}>

@ -222,7 +227,6 @@ ${context.episodeContent}
  ];
 };

-
 /**
 * Resolve entity duplications
 */
@ -325,63 +329,3 @@ ${JSON.stringify(context.extracted_nodes, null, 2)}
    },
  ];
 };
-
-export const extractAttributes = (
-  context: Record<string, any>,
-): CoreMessage[] => {
-  const sysPrompt = `
-You are an AI assistant that extracts and enhances entity attributes based on context.
-Your task is to analyze entities and provide appropriate attribute values based on available information.
-
-For each entity:
-1. Analyze the context to identify relevant attributes for the entity
-2. Extract appropriate values from the episode content if available
-3. Focus on factual, descriptive attributes rather than type classifications
-4. Give empty attributes object ({}) when there are no attributes to update
-5. Only include attributes that you're adding or modifying
-6. I'll merge your new attributes with existing ones, so only provide updates
-
-Common attribute types to consider:
- Descriptive properties (color, size, status, etc.)
- Relational context (role, position, relationship, etc.)
- Temporal information (duration, frequency, etc.)
- Qualitative aspects (importance, preference, etc.)
-
-## CRITICAL OUTPUT FORMAT REQUIREMENTS:
-
-**YOU MUST STRICTLY FOLLOW THESE FORMAT RULES:**
-1. **ALWAYS use <output> tags** - Never use any other tag format
-2. **ONLY output valid JSON** within the <output> tags
-3. **NO additional text** before or after the <output> tags
-4. **NO comments** inside the JSON
-5. **REQUIRED structure:** Must follow exact JSON schema shown below
-
-<output>
-{
-  "entities": [
-    {
-      "uuid": "entity-uuid",
-      "attributes": {
-        "attributeName1": "value1",
-        "attributeName2": "value2"
-      }
-    }
-  ]
-}
-</output>`;
-
-  const userPrompt = `
-<ENTITIES>
-${JSON.stringify(context.entities, null, 2)}
-</ENTITIES>
-
-<EPISODE_CONTENT>
-${context.episodeContent}
-</EPISODE_CONTENT>
-
-Based on the above information, please extract and enhance attributes for each entity based on the context. Return only the uuid and updated attributes for each entity.`;
-  return [
-    { role: "system", content: sysPrompt },
-    { role: "user", content: userPrompt },
-  ];
-};