refactor: make entity handling type-free and simplify entity resolution in knowledge graph

2026-01-10 23:48:26 +00:00 · 2025-09-09 20:00:50 +05:30 · 2025-09-09 20:00:50 +05:30 · 952386ca0e
commit 952386ca0e
parent 6ddcab873a
5 changed files with 151 additions and 465 deletions
--- a/apps/webapp/app/services/graphModels/entity.ts
+++ b/apps/webapp/app/services/graphModels/entity.ts
@ -2,19 +2,9 @@ import type { EntityNode } from "@core/types";
 import { runQuery } from "~/lib/neo4j.server";

 export async function saveEntity(entity: EntityNode): Promise<string> {
-  // Debug: Log entity to identify missing typeEmbedding
-  if (!entity.typeEmbedding) {
-    console.error(`Entity missing typeEmbedding:`, {
-      uuid: entity.uuid,
-      name: entity.name,
-      type: entity.type,
-      hasNameEmbedding: !!entity.nameEmbedding,
-    });
-    throw new Error(
-      `Entity ${entity.name} (${entity.type}) is missing typeEmbedding`,
-    );
-  }
-
+  // Build query conditionally based on whether typeEmbedding exists
+  const hasTypeEmbedding = entity.typeEmbedding && entity.typeEmbedding.length > 0;
+  
  const query = `
    MERGE (n:Entity {uuid: $uuid})
      ON CREATE SET
@ -22,7 +12,7 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
        n.type = $type,
        n.attributes = $attributes,
        n.nameEmbedding = $nameEmbedding,
-        n.typeEmbedding = $typeEmbedding,
+        ${hasTypeEmbedding ? 'n.typeEmbedding = $typeEmbedding,' : ''}
        n.createdAt = $createdAt,
        n.userId = $userId,
        n.space = $space
@ -31,23 +21,27 @@ export async function saveEntity(entity: EntityNode): Promise<string> {
        n.type = $type,
        n.attributes = $attributes,
        n.nameEmbedding = $nameEmbedding,
-        n.typeEmbedding = $typeEmbedding,
+        ${hasTypeEmbedding ? 'n.typeEmbedding = $typeEmbedding,' : ''}
        n.space = $space
      RETURN n.uuid as uuid
    `;

-  const params = {
+  const params: any = {
    uuid: entity.uuid,
    name: entity.name,
-    type: entity.type,
+    type: entity.type || "",
    attributes: JSON.stringify(entity.attributes || {}),
    nameEmbedding: entity.nameEmbedding,
-    typeEmbedding: entity.typeEmbedding,
    createdAt: entity.createdAt.toISOString(),
    userId: entity.userId,
    space: entity.space || null,
  };

+  // Add typeEmbedding to params only if it exists
+  if (hasTypeEmbedding) {
+    params.typeEmbedding = entity.typeEmbedding;
+  }
+
  const result = await runQuery(query, params);
  return result[0].get("uuid");
 }
@ -65,10 +59,10 @@ export async function getEntity(uuid: string): Promise<EntityNode | null> {
  return {
    uuid: entity.uuid,
    name: entity.name,
-    type: entity.type,
+    type: entity.type || null,
    attributes: JSON.parse(entity.attributes || "{}"),
    nameEmbedding: entity.nameEmbedding,
-    typeEmbedding: entity.typeEmbedding,
+    typeEmbedding: entity.typeEmbedding || null,
    createdAt: new Date(entity.createdAt),
    userId: entity.userId,
    space: entity.space,
--- a/apps/webapp/app/services/knowledgeGraph.server.ts
+++ b/apps/webapp/app/services/knowledgeGraph.server.ts
@ -15,8 +15,7 @@ import crypto from "crypto";
 import {
  dedupeNodes,
  extractAttributes,
-  extractMessage,
-  extractText,
+  extractEntities,
 } from "./prompts/nodes";
 import {
  extractStatements,
@ -25,14 +24,11 @@ import {
 import {
  getEpisodeStatements,
  getRecentEpisodes,
-  getRelatedEpisodesEntities,
  searchEpisodesByEmbedding,
 } from "./graphModels/episode";
 import {
  findExactPredicateMatches,
  findSimilarEntities,
-  findSimilarEntitiesWithSameType,
-  replaceEntityReferences,
 } from "./graphModels/entity";
 import {
  findContradictoryStatements,
@ -47,9 +43,7 @@ import { getEmbedding, makeModelCall } from "~/lib/model.server";
 import { runQuery } from "~/lib/neo4j.server";
 import {
  Apps,
-  getNodeTypes,
  getNodeTypesString,
-  isPresetType,
 } from "~/utils/presets/nodes";
 import { normalizePrompt, normalizeDocumentPrompt } from "./prompts";
 import { type PrismaClient } from "@prisma/client";
@ -272,8 +266,8 @@ export class KnowledgeGraphService {
        params.type,
      );

-      const normalizedTime = Date.now() - startTime;
-      logger.log(`Normalized episode body in ${normalizedTime} ms`);
+      const normalizedTime = Date.now();
+      logger.log(`Normalized episode body in ${normalizedTime - startTime} ms`);

      if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
        logger.log("Nothing to remember");
@ -284,15 +278,6 @@ export class KnowledgeGraphService {
        };
      }

-      const relatedEpisodesEntities = await getRelatedEpisodesEntities({
-        embedding: await this.getEmbedding(normalizedEpisodeBody),
-        userId: params.userId,
-        minSimilarity: 0.7,
-      });
-
-      const relatedTime = Date.now() - normalizedTime;
-      logger.log(`Related episodes entities in ${relatedTime} ms`);
-
      // Step 2: Episode Creation - Create or retrieve the episode
      const episode: EpisodicNode = {
        uuid: crypto.randomUUID(),
@ -316,23 +301,18 @@ export class KnowledgeGraphService {
      );

      const extractedTime = Date.now();
-      logger.log(`Extracted entities in ${extractedTime - relatedTime} ms`);
+      logger.log(`Extracted entities in ${extractedTime - normalizedTime} ms`);

-      // Step 3.1: Context-aware entity resolution with preset type evolution
-      await this.resolveEntitiesWithContext(
-        extractedNodes,
-        relatedEpisodesEntities,
-      );
-
-      // Step 3.2: Handle preset type logic - expand entities for statement extraction
-      const categorizedEntities = await this.expandEntitiesForStatements(
-        extractedNodes,
-        episode,
-      );
+      // Step 3.1: Simple entity categorization (no type-based expansion needed)
+      const categorizedEntities = {
+        primary: extractedNodes,
+        expanded: [], // No expansion needed with type-free approach
+      };

      const expandedTime = Date.now();
-      logger.log(`Expanded entities in ${expandedTime - extractedTime} ms`);
+      logger.log(`Processed entities in ${expandedTime - extractedTime} ms`);

+      console.log(extractedNodes.map((e) => e.name));
      // Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges
      const extractedStatements = await this.extractStatements(
        episode,
@ -371,10 +351,12 @@ export class KnowledgeGraphService {
      );

      // Step 7: ADd attributes to entity nodes
-      const updatedTriples = await this.addAttributesToEntities(
-        resolvedStatements,
-        episode,
-      );
+      // const updatedTriples = await this.addAttributesToEntities(
+      //   resolvedStatements,
+      //   episode,
+      // );
+
+      const updatedTriples = resolvedStatements;

      const updatedTriplesTime = Date.now();
      logger.log(
@ -439,12 +421,6 @@ export class KnowledgeGraphService {
    episode: EpisodicNode,
    previousEpisodes: EpisodicNode[],
  ): Promise<EntityNode[]> {
-    // Get all app keys
-    const allAppEnumValues = Object.values(Apps);
-
-    // Get all node types
-    const entityTypes = getNodeTypes(allAppEnumValues);
-
    // Use the prompt library to get the appropriate prompts
    const context = {
      episodeContent: episode.content,
@ -452,13 +428,11 @@ export class KnowledgeGraphService {
        content: ep.content,
        createdAt: ep.createdAt.toISOString(),
      })),
-      entityTypes: entityTypes,
    };

-    // Get the extract_json prompt from the prompt library
-    const messages = episode.sessionId
-      ? extractMessage(context)
-      : extractText(context);
+    // Get the unified entity extraction prompt
+    const extractionMode = episode.sessionId ? 'conversation' : 'document';
+    const messages = extractEntities(context, extractionMode);

    let responseText = "";

@ -474,21 +448,19 @@ export class KnowledgeGraphService {
      responseText = outputMatch[1].trim();
      const extractedEntities = JSON.parse(responseText || "{}").entities || [];

-      // Batch generate embeddings for better performance
+      // Batch generate embeddings for entity names
      const entityNames = extractedEntities.map((entity: any) => entity.name);
-      const entityTypes = extractedEntities.map((entity: any) => entity.type);
-      const [nameEmbeddings, typeEmbeddings] = await Promise.all([
-        Promise.all(entityNames.map((name: string) => this.getEmbedding(name))),
-        Promise.all(entityTypes.map((type: string) => this.getEmbedding(type))),
-      ]);
+      const nameEmbeddings = await Promise.all(
+        entityNames.map((name: string) => this.getEmbedding(name))
+      );

      entities = extractedEntities.map((entity: any, index: number) => ({
        uuid: crypto.randomUUID(),
        name: entity.name,
-        type: entity.type,
+        type: undefined, // Type will be inferred from statements
        attributes: entity.attributes || {},
        nameEmbedding: nameEmbeddings[index],
-        typeEmbedding: typeEmbeddings[index],
+        typeEmbedding: undefined, // No type embedding needed
        createdAt: new Date(),
        userId: episode.userId,
      }));
@ -537,6 +509,8 @@ export class KnowledgeGraphService {
      responseText = text;
    });

+    console.log(responseText);
+
    const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
    if (outputMatch && outputMatch[1]) {
      responseText = outputMatch[1].trim();
@ -548,6 +522,8 @@ export class KnowledgeGraphService {
    const extractedTriples: ExtractedTripleData[] =
      JSON.parse(responseText || "{}").edges || [];

+    console.log(`extracted triples length: ${extractedTriples.length}`)
+
    // Create maps to deduplicate entities by name within this extraction
    const predicateMap = new Map<string, EntityNode>();

@ -597,17 +573,13 @@ export class KnowledgeGraphService {
    // Convert extracted triples to Triple objects with Statement nodes
    const triples = extractedTriples.map(
      (triple: ExtractedTripleData, tripleIndex: number) => {
-        // Find the subject and object nodes by matching both name and type
+        // Find the subject and object nodes by matching name (type-free approach)
        const subjectNode = allEntities.find(
-          (node) =>
-            node.name.toLowerCase() === triple.source.toLowerCase() &&
-            node.type.toLowerCase() === triple.sourceType.toLowerCase(),
+          (node) => node.name.toLowerCase() === triple.source.toLowerCase()
        );

        const objectNode = allEntities.find(
-          (node) =>
-            node.name.toLowerCase() === triple.target.toLowerCase() &&
-            node.type.toLowerCase() === triple.targetType.toLowerCase(),
+          (node) => node.name.toLowerCase() === triple.target.toLowerCase()
        );

        // Get the deduplicated predicate node
@ -661,108 +633,7 @@ export class KnowledgeGraphService {
    return triples.filter(Boolean) as Triple[];
  }

-  /**
-   * Expand entities for statement extraction by adding existing preset entities
-   */
-  private async expandEntitiesForStatements(
-    extractedNodes: EntityNode[],
-    episode: EpisodicNode,
-  ): Promise<{
-    primary: EntityNode[];
-    expanded: EntityNode[];
-  }> {
-    const allAppEnumValues = Object.values(Apps);
-    const expandedEntities: EntityNode[] = [];

-    // For each extracted entity, check if we need to add existing preset entities
-    for (const entity of extractedNodes) {
-      const newIsPreset = isPresetType(entity.type, allAppEnumValues);
-
-      // Find similar entities with same name
-      const similarEntities = await findSimilarEntities({
-        queryEmbedding: entity.nameEmbedding,
-        limit: 5,
-        threshold: 0.8,
-        userId: episode.userId,
-      });
-
-      for (const existingEntity of similarEntities) {
-        const existingIsPreset = isPresetType(
-          existingEntity.type,
-          allAppEnumValues,
-        );
-
-        // If both are preset types, include both for statement extraction
-        if (newIsPreset && existingIsPreset) {
-          // Add the existing entity to the list if not already present
-          if (!expandedEntities.some((e) => e.uuid === existingEntity.uuid)) {
-            expandedEntities.push(existingEntity);
-          }
-        }
-      }
-    }
-
-    // Deduplicate by name AND type combination
-    const deduplicateEntities = (entities: EntityNode[]) => {
-      const seen = new Map<string, EntityNode>();
-      return entities.filter((entity) => {
-        const key = `${entity.name.toLowerCase()}_${entity.type.toLowerCase()}`;
-        if (seen.has(key)) {
-          return false;
-        }
-        seen.set(key, entity);
-        return true;
-      });
-    };
-
-    return {
-      primary: deduplicateEntities(extractedNodes),
-      expanded: deduplicateEntities(
-        expandedEntities.filter(
-          (e) => !extractedNodes.some((primary) => primary.uuid === e.uuid),
-        ),
-      ),
-    };
-  }
-
-  /**
-   * Resolve entities with context-aware deduplication and preset type evolution
-   * Only merges entities that appear in semantically related episodes
-   */
-  private async resolveEntitiesWithContext(
-    extractedNodes: EntityNode[],
-    relatedEpisodesEntities: EntityNode[],
-  ): Promise<void> {
-    const allAppEnumValues = Object.values(Apps);
-
-    extractedNodes.map(async (newEntity) => {
-      // Find same-name entities in related episodes (contextually relevant)
-      const sameNameInContext = relatedEpisodesEntities.filter(
-        (existing) =>
-          existing.name.toLowerCase() === newEntity.name.toLowerCase(),
-      );
-
-      if (sameNameInContext.length > 0) {
-        let existingEntityIds: string[] = [];
-        sameNameInContext.forEach(async (existingEntity) => {
-          const newIsPreset = isPresetType(newEntity.type, allAppEnumValues);
-          const existingIsPreset = isPresetType(
-            existingEntity.type,
-            allAppEnumValues,
-          );
-
-          if (newIsPreset && !existingIsPreset) {
-            // New is preset, existing is custom - evolve existing entity to preset type
-            existingEntityIds.push(existingEntity.uuid);
-          }
-        });
-
-        if (existingEntityIds.length > 0) {
-          await replaceEntityReferences(newEntity, existingEntityIds);
-        }
-      }
-    });
-  }

  /**
   * Resolve extracted nodes to existing nodes or create new ones
@ -835,9 +706,8 @@ export class KnowledgeGraphService {
    // Step 2a: Find similar entities for non-predicate entities
    const similarEntitiesResults = await Promise.all(
      nonPredicates.map(async (entity) => {
-        const similarEntities = await findSimilarEntitiesWithSameType({
+        const similarEntities = await findSimilarEntities({
          queryEmbedding: entity.nameEmbedding,
-          entityType: entity.type,
          limit: 5,
          threshold: 0.7,
          userId: episode.userId,
@ -1240,20 +1110,12 @@ export class KnowledgeGraphService {
      return triples; // No entities to process
    }

-    // Get all app keys
-    const allAppEnumValues = Object.values(Apps);
-
-    // Get all node types with their attribute definitions
-    const entityTypes = getNodeTypes(allAppEnumValues);
-
    // Prepare simplified context for the LLM
    const context = {
      episodeContent: episode.content,
-      entityTypes: entityTypes,
      entities: entities.map((entity) => ({
        uuid: entity.uuid,
        name: entity.name,
-        type: entity.type,
        currentAttributes: entity.attributes || {},
      })),
    };
--- a/apps/webapp/app/services/prompts/nodes.ts
+++ b/apps/webapp/app/services/prompts/nodes.ts
@ -5,16 +5,20 @@
 import { type CoreMessage } from "ai";

 /**
- * Extract entities from an episode using message-based approach
+ * Extract entities from content using unified approach (works for both conversations and documents)
 */
-export const extractMessage = (context: Record<string, any>): CoreMessage[] => {
+export const extractEntities = (
+  context: Record<string, any>, 
+  extractionMode: 'conversation' | 'document' = 'conversation'
+): CoreMessage[] => {
  const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
-Your primary task is to extract and classify significant entities mentioned in the conversation.
+Your primary task is to extract all significant entities mentioned in the conversation, treating both concrete entities and type/concept entities as first-class nodes.

-In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
+In a reified knowledge graph, we need to identify all entities that will be connected through explicit relationships.
 Focus on extracting:
-1. Subject entities (people, objects, concepts)
-2. Object entities (people, objects, concepts)
+1. Concrete entities (people, objects, specific instances)
+2. Type/concept entities (categories, classes, abstract concepts)
+3. All entities that participate in "X is a Y" relationships

 Instructions:

@ -22,16 +26,16 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr

 1. **Entity Identification**:
   - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE.
-   - For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X).
+   - For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as an Alias entity AND the named entity (X).
   - **ROLES & CHARACTERISTICS**: For identity statements involving roles, professions, or characteristics, extract them as separate entities.
   - For pronouns that refer to named entities, extract them as separate Alias entities.
+   - **TYPE/CONCEPT ENTITIES**: When text contains "X is a Y" statements, extract BOTH X and Y as separate entities.

-2. **Entity Classification**:
-   - Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
-   - DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
-   - If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context (e.g., "memory_graph_system", "authentication_bug").
-   - Each entity should have exactly ONE type that best describes what it is.
-   - Classify pronouns (I, me, you, etc.) as "Alias" entities.
+2. **Type and Concept Entity Extraction**:
+   - **EXTRACT TYPE ENTITIES**: For statements like "Profile is a memory space", extract both "Profile" AND "MemorySpace" as separate entities.
+   - **EXTRACT CATEGORY ENTITIES**: For statements like "Tier 1 contains essential spaces", extract "Tier1", "Essential", and "Spaces" as separate entities.
+   - **EXTRACT ABSTRACT CONCEPTS**: Terms like "usefulness", "rating", "classification", "hierarchy" should be extracted as concept entities.
+   - **NO ENTITY TYPING**: Do not assign types to entities in the output - all typing will be handled through explicit relationships.

 3. **Exclusions**:
   - Do NOT extract entities representing relationships or actions (predicates will be handled separately).
@ -40,13 +44,13 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
   - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").

 4. **Entity Name Extraction**:
-   - Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
-   - When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle" 
-   - When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
-   - **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
-   - **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
+   - Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
+   - When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car" 
+   - When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
+   - **CLEAN NAMES**: Remove articles (a, an, the) and quantifiers, but preserve the core concept
+   - **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") 
   - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
-   - **NO TYPE SUFFIXES**: Never append the entity type to the entity name
+   - **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space")

 5. **Temporal and Relationship Context Extraction**:
   - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
@ -57,6 +61,19 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr

 ## Examples of Correct Entity Extraction:

+**TYPE/CONCEPT ENTITY EXTRACTION:**
+
+✅ **EXTRACT BOTH ENTITIES IN "IS A" RELATIONSHIPS:**
+- Text: "Profile is a memory space" → Extract: "Profile" AND "MemorySpace"
+- Text: "Tesla is a car" → Extract: "Tesla" AND "Car"
+- Text: "John is a teacher" → Extract: "John" AND "Teacher"
+- Text: "Goals space connects to Projects" → Extract: "Goals", "Space", AND "Projects"
+
+✅ **EXTRACT CONCEPT ENTITIES:**
+- Text: "rated 10/10 for usefulness" → Extract: "Usefulness", "Rating"
+- Text: "essential classification tier" → Extract: "Essential", "Classification", "Tier"
+- Text: "hierarchical memory system" → Extract: "Hierarchical", "Memory", "System"
+
 **TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:**

 ✅ **EXTRACT - Relationship Temporal Information:**
@ -73,47 +90,50 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
 - Text: "next week" → Don't extract "next week"

 **RELATIONSHIP CONTEXT ENTITIES:**
- Text: "my close friends" → Extract: "close friends" (QualifiedGroup)
- Text: "strong support system" → Extract: "support system" (RelationshipType)
- Text: "work colleagues" → Extract: "work colleagues" (ProfessionalGroup)
- Text: "family members" → Extract: "family members" (FamilyGroup)
+- Text: "my close friends" → Extract: "Close Friends" (QualifiedGroup)
+- Text: "strong support system" → Extract: "Support System" (RelationshipType)
+- Text: "work colleagues" → Extract: "Work Colleagues" (ProfessionalGroup)
+- Text: "family members" → Extract: "Family Members" (FamilyGroup)

 **STANDARD ENTITY EXTRACTION:**
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
+- Text: "Tesla car" → Extract: "Tesla" AND "Car"
+- Text: "Google's search engine" → Extract: "Google" AND "Search Engine"
+- Text: "Microsoft Office suite" → Extract: "Microsoft Office" AND "Suite"
+- Text: "John's startup company" → Extract: "John", "Startup", AND "Company"

-**INCORRECT Examples:**
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
+**CORRECT vs INCORRECT Examples:**
+
+✅ **CORRECT:**
+- Text: "Profile is a memory space" → Extract: "Profile", "MemorySpace"
+- Text: "essential classification system" → Extract: "Essential", "Classification", "System"
+- Text: "10/10 usefulness rating" → Extract: "Usefulness", "Rating"
+
+❌ **INCORRECT:**
+- Text: "Profile is a memory space" → ❌ Only extract: "Profile" 
+- Text: "authentication system" → ❌ Extract: "authentication system" (should be "Authentication", "System")
+- Text: "payment service" → ❌ Extract: "payment service" (should be "Payment", "Service")

 Format your response as a JSON object with the following structure:
 <output>
 {
  "entities": [
    {
-      "name": "Entity Name",
-      "type": "Entity Type",
+      "name": "Entity Name"
    }
    // Additional entities...
  ]
 }
 </output>`;

+  const contentLabel = extractionMode === 'conversation' ? 'CURRENT EPISODE' : 'TEXT';
  const userPrompt = `
-<PREVIOUS EPISODES>
+${extractionMode === 'conversation' ? `<PREVIOUS EPISODES>
 ${JSON.stringify(context.previousEpisodes || [], null, 2)}
 </PREVIOUS EPISODES>

-<CURRENT EPISODE>
+` : ''}<${contentLabel}>
 ${context.episodeContent}
-</CURRENT EPISODE>
-
-<ENTITY_TYPES>
-${JSON.stringify(context.entityTypes || {}, null, 2)}
-</ENTITY_TYPES>
+</${contentLabel}>

 `;

@ -123,161 +143,6 @@ ${JSON.stringify(context.entityTypes || {}, null, 2)}
  ];
 };

-/**
- * Extract entities from text-based content
- */
-export const extractText = (context: Record<string, any>): CoreMessage[] => {
-  const sysPrompt = `
-You are an AI assistant that extracts entity nodes from text for a reified knowledge graph.
-Your primary task is to extract and classify significant entities mentioned in the provided text.
-
-In a reified knowledge graph, we need to identify subject and object entities that will be connected through statements.
-Focus on extracting:
-1. Subject entities
-2. Object entities 
-
-Instructions:
-
-You are given a TEXT. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the TEXT.
-
-1. **Entity Identification**:
-   - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
-   - For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X).
-   - **ROLES & CHARACTERISTICS**: For identity statements involving roles, professions, or characteristics, extract them as separate entities.
-   - For pronouns that refer to named entities, extract them as separate Alias entities.
-
-2. **Entity Classification**:
-   - Prefer using appropriate types from the ENTITY_TYPES section when they fit naturally.
-   - DO NOT force-fit entities into inappropriate types from ENTITY_TYPES.
-   - If no type from ENTITY_TYPES fits naturally, create a descriptive type based on context.
-   - Each entity should have exactly ONE type that best describes what it is.
-   - Classify pronouns (I, me, you, etc.) as "Alias" entities.
-
-3. **Exclusions**:
-   - Do NOT extract entities representing relationships or actions (predicates will be handled separately).
-   - **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements.
-   - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately.
-   - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
-
-4. **Entity Name Extraction**:
-   - Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers
-   - When text mentions "Tesla car", extract name as "Tesla" with type "Vehicle" 
-   - When text mentions "John's company", extract name as "John" with type "Person" (company is a separate entity)
-   - **CLEAN NAMES**: Remove type words like "app", "system", "platform", "tool", "service", "company", "organization" from the entity name
-   - **PRONOUNS**: Use exact form as they appear (e.g., "I", "me", "you") and classify as "Alias"
-   - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
-   - **NO TYPE SUFFIXES**: Never append the entity type to the entity name
-
-5. **Temporal and Relationship Context Extraction**:
-   - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
-   - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
-   - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
-   - DO NOT extract absolute dates, timestamps, or specific time points ("June 9, 2023", "3pm", "last Saturday")
-   - DO NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday")
-
-## Examples of Correct Entity Extraction:
-
-**TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:**
-
-✅ **EXTRACT - Relationship Temporal Information:**
- Text: "I've known these friends for 4 years" → Extract: "4 years" (Duration)
- Text: "since I moved from my home country" → Extract: "since moving" (TemporalContext)  
- Text: "after that tough breakup" → Extract: "after breakup" (TemporalContext)
- Text: "we've been married for 5 years" → Extract: "5 years" (Duration)
- Text: "during college" → Extract: "during college" (TemporalContext)
-
-❌ **EXCLUDE - Absolute Dates/Times:**
- Text: "on June 9, 2023" → Don't extract "June 9, 2023" 
- Text: "last Saturday" → Don't extract "last Saturday"
- Text: "at 3pm yesterday" → Don't extract "3pm" or "yesterday"
- Text: "next week" → Don't extract "next week"
-
-**RELATIONSHIP CONTEXT ENTITIES:**
- Text: "my close friends" → Extract: "close friends" (QualifiedGroup)
- Text: "strong support system" → Extract: "support system" (RelationshipType)
- Text: "work colleagues" → Extract: "work colleagues" (ProfessionalGroup)
- Text: "family members" → Extract: "family members" (FamilyGroup)
-
-**STANDARD ENTITY EXTRACTION:**
- Text: "Tesla car" → Name: "Tesla", Type: "Vehicle"
- Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product"
- Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software"
- Text: "John's startup company" → Name: "John", Type: "Person" + Name: "Startup", Type: "Company"
-
-**INCORRECT Examples:**
- Text: "Tesla car" → ❌ Name: "Tesla car", Type: "Vehicle"
- Text: "authentication system" → ❌ Name: "authentication system", Type: "System"
- Text: "payment service" → ❌ Name: "payment service", Type: "Service"
-
-Format your response as a JSON object with the following structure:
-<output>
-{
-  "entities": [
-    {
-      "name": "Entity Name",
-      "type": "Entity Type"
-    }
-    // Additional entities...
-  ]
-}
-</output>`;
-  const userPrompt = `
-<TEXT>
-${context.episodeContent}
-</TEXT>
-
-<ENTITY_TYPES>
-${JSON.stringify(context.entityTypes || {}, null, 2)}
-</ENTITY_TYPES>
-`;
-
-  return [
-    { role: "system", content: sysPrompt },
-    { role: "user", content: userPrompt },
-  ];
-};
-/**
- * Extract entities from an episode using JSON-based approach
- */
-export const extractJson = (context: Record<string, any>): CoreMessage[] => {
-  const sysPrompt = `You are an AI assistant that extracts entity nodes from text. 
-Your primary task is to extract and classify significant entities mentioned in the content.`;
-
-  const userPrompt = `
-<PREVIOUS EPISODES>
-${JSON.stringify(context.previousEpisodes || [], null, 2)}
-</PREVIOUS EPISODES>
-
-<CURRENT EPISODE>
-${context.episodeContent}
-</CURRENT EPISODE>
-
-<ENTITY TYPES>
-${JSON.stringify(context.entityTypes || {}, null, 2)}
-</ENTITY TYPES>
-
-Instructions:
-
-Extract all significant entities mentioned in the CURRENT EPISODE. For each entity, provide a name and type.
-Respond with a JSON object containing an "entities" array of objects, each with "name" and "type" properties.
-
-Guidelines:
-1. Extract significant entities, concepts, or actors mentioned in the content.
-2. Avoid creating nodes for relationships or actions.
-3. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
-4. **CLEAN ENTITY NAMES**: Extract ONLY the core entity name WITHOUT type descriptors:
-   - "Tesla car" → Name: "Tesla", Type: "Vehicle"
-   - Remove words like "app", "system", "platform", "tool", "service", "company" from entity names
-5. Use full names when available and avoid abbreviations.
-
-${context.customPrompt || ""}
-`;
-
-  return [
-    { role: "system", content: sysPrompt },
-    { role: "user", content: userPrompt },
-  ];
-};

 /**
 * Resolve entity duplications
@ -286,84 +151,53 @@ export const dedupeNodes = (context: Record<string, any>): CoreMessage[] => {
  return [
    {
      role: "system",
-      content: `You are a helpful assistant who determines whether or not ENTITIES extracted from a conversation are duplicates of existing entities.
+      content: `You are a helpful assistant who determines whether extracted entities are duplicates of existing entities.

-## CRITICAL RULE: Entity Type Matters
-DO NOT mark entities with different types as duplicates, even if they have identical names.
- DO NOT mark "John" (Person) and "John" (Company) as duplicates
- DO NOT mark "Apple" (Company) and "Apple" (Fruit) as duplicates  
- DO NOT mark "Core" (App) and "Core" (Concept) as duplicates
-
-Consider entities as potential duplicates ONLY if they have:
-1. Similar or identical names AND
-2. The EXACT SAME entity type
+Focus on name-based similarity and contextual meaning to identify duplicates.

 Each entity in ENTITIES is represented as a JSON object with the following structure:
 {
    id: integer id of the entity,
    name: "name of the entity",
-    entity_type: "ontological classification of the entity",
-    entity_type_description: "Description of what the entity type represents",
    duplication_candidates: [
        {
            idx: integer index of the candidate entity,
            name: "name of the candidate entity",
-            entity_type: "ontological classification of the candidate entity",
            ...<additional attributes>
        }
    ]
 }

-## Duplication Decision Rules
-For each entity, determine if it is a duplicate of any of its duplication candidates:
+## Duplication Decision Framework

 ### MARK AS DUPLICATE (duplicate_idx >= 0) when:
- Verify the candidate has the SAME entity_type as the current entity
- AND confirm the entities refer to the same real-world object or concept
- AND check that the names are very similar or identical
-
-### SPECIAL RULE FOR PREDICATES:
-**ALWAYS mark identical predicates as duplicates** - predicates are universal and reusable:
- Mark "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0 ✓
- Mark "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0 ✓
- Mark "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0 ✓
+- **IDENTICAL NAMES**: Exact same name or obvious synonyms
+- **SEMANTIC EQUIVALENCE**: Different names but clearly referring to the same entity
+- **STRUCTURAL VARIATIONS**: Same entity with minor formatting differences

 ### DO NOT mark as duplicate (duplicate_idx = -1) when:
- Confirm the candidate has a DIFFERENT entity_type (even with identical names)
- Identify they are related but distinct entities
- Recognize they have similar names or purposes but refer to separate instances or concepts
- Distinguish when one is a general concept and the other is a specific instance
- **EXCEPTION**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
+- **DIFFERENT INSTANCES**: Similar names but different real-world entities
+- **CONTEXTUAL DISTINCTION**: Same name but different contexts suggest distinct entities
+- **HIERARCHICAL RELATIONSHIPS**: One is part of/contains the other

-## Examples:
+## Example Patterns:

-**CORRECT - Mark as NOT Duplicates (Different Types):**
- Set "Tesla" (Company) vs "Tesla" (Car) → duplicate_idx = -1
- Set "Apple" (Company) vs "Apple" (Fruit) → duplicate_idx = -1
- Set "Core" (App) vs "Core" (System) → duplicate_idx = -1
+**DUPLICATE CASES:**
+- "John Smith" vs "John Smith" → Check context for same person
+- "Microsoft" vs "Microsoft Corporation" → Same organization (duplicate_idx = 0)
+- "iPhone" vs "Apple iPhone" → Same product (duplicate_idx = 0)
+- "Tier 1" vs "Tier 1" → Same classification level (duplicate_idx = 0)

-**CORRECT - Mark Predicates AS Duplicates (Same Name, Same Type):**
- Set "is associated with" (Predicate) vs "is associated with" (Predicate) → duplicate_idx = 0
- Set "works for" (Predicate) vs "works for" (Predicate) → duplicate_idx = 0
- Set "owns" (Predicate) vs "owns" (Predicate) → duplicate_idx = 0
+**NOT DUPLICATE CASES:**
+- "Meeting Room A" vs "Meeting Room B" → Different rooms (duplicate_idx = -1)
+- "Project Alpha" vs "Project Beta" → Different projects (duplicate_idx = -1)
+- "Essential" vs "Critical" → Different priority levels (duplicate_idx = -1)
+- "Team Lead" vs "Team Member" → Different roles (duplicate_idx = -1)

-**CORRECT - Evaluate Potential Duplicates (Same Type):**
- Check if "John Smith" (Person) vs "John Smith" (Person) refer to same person
- Check if "Microsoft" (Company) vs "Microsoft Corporation" (Company) are the same company
- Check if "iPhone" (Product) vs "Apple iPhone" (Product) are the same product
-
-**CORRECT - Mark as NOT Duplicates (Same Type, Different Instances):**
- Set "Meeting" (Event) vs "Meeting" (Event) → duplicate_idx = -1 (different meetings)
- Set "Project" (Task) vs "Project" (Task) → duplicate_idx = -1 (different projects)
- **NOTE**: DO NOT apply this rule to Predicates - always deduplicate identical predicates
-
-## Task:
-Provide your response as a JSON object with an "entity_resolutions" array containing one entry for each entity.
-
-For each entity, include:
- "id": the id of the entity (integer)
- "name": the name of the entity (string)  
- "duplicate_idx": the index of the duplicate candidate, or -1 if no duplicate (integer)
+## Decision Guidelines:
+- **CONSERVATIVE APPROACH**: When uncertain, prefer NOT marking as duplicate
+- **CONTEXT MATTERS**: Consider the episode content and previous episodes
+- **SEMANTIC MEANING**: Focus on whether they refer to the same real-world entity

 Format your response as follows:
 <output>
@ -380,12 +214,9 @@ Format your response as follows:
 </output>

 ## Important Instructions:
- FIRST check if entity types match before considering any duplication
- If entity types don't match, immediately set duplicate_idx = -1
- Only mark entities with identical types as potential duplicates
- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
 - Always include all entities from the input in your response
 - Always wrap the output in these tags <output> </output>
+- When in doubt, prefer NOT marking as duplicate (duplicate_idx = -1)
    `,
    },
    {
@ -412,16 +243,21 @@ export const extractAttributes = (
 ): CoreMessage[] => {
  const sysPrompt = `
 You are an AI assistant that extracts and enhances entity attributes based on context.
-Your task is to analyze entities and provide appropriate attribute values for each entity based on its type definition.
+Your task is to analyze entities and provide appropriate attribute values based on available information.

 For each entity:
-1. Look at its type and identify the required and optional attributes from the entity type definitions
-2. Check if the entity already has values for these attributes
-3. For missing attributes, extract appropriate values from the context if possible
-4. For existing attributes, enhance or correct them if needed based on the context
-5. Give empty attributes object ({}) when there are no attributes to update
-6. Only include attributes that you're updating - don't repeat existing attributes that don't need changes
-7. I'll merge your new attributes with the current attributes, so only provide values that should be added or modified
+1. Analyze the context to identify relevant attributes for the entity
+2. Extract appropriate values from the episode content if available
+3. Focus on factual, descriptive attributes rather than type classifications
+4. Give empty attributes object ({}) when there are no attributes to update
+5. Only include attributes that you're adding or modifying
+6. I'll merge your new attributes with existing ones, so only provide updates
+
+Common attribute types to consider:
+- Descriptive properties (color, size, status, etc.)
+- Relational context (role, position, relationship, etc.)
+- Temporal information (duration, frequency, etc.)
+- Qualitative aspects (importance, preference, etc.)

 Provide your output in this structure:
 <output>
@ -441,10 +277,6 @@ Provide your output in this structure:
 </output>`;

  const userPrompt = `
-<ENTITY_TYPES>
-${JSON.stringify(context.entityTypes, null, 2)}
-</ENTITY_TYPES>
-
 <ENTITIES>
 ${JSON.stringify(context.entities, null, 2)}
 </ENTITIES>
@ -453,7 +285,7 @@ ${JSON.stringify(context.entities, null, 2)}
 ${context.episodeContent}
 </EPISODE_CONTENT>

-Based on the above information, please extract and enhance attributes for each entity according to its type definition. Return only the uuid and updated attributes for each entity.`;
+Based on the above information, please extract and enhance attributes for each entity based on the context. Return only the uuid and updated attributes for each entity.`;
  return [
    { role: "system", content: sysPrompt },
    { role: "user", content: userPrompt },
--- a/apps/webapp/app/services/prompts/statements.ts
+++ b/apps/webapp/app/services/prompts/statements.ts
@ -132,10 +132,8 @@ Format your response as a JSON object with the following structure:
  "edges": [
    {
      "source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
-      "sourceType": "[Source Entity Type]",
      "predicate": "[Relationship Type]",
      "target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]", 
-      "targetType": "[Target Entity Type]",
      "fact": "[Natural language representation of the fact]",
      "attributes": { 
        "confidence": confidence of the fact,
--- a/packages/types/src/graph/graph.entity.ts
+++ b/packages/types/src/graph/graph.entity.ts
@ -48,10 +48,10 @@ export interface EpisodicNode {
 export interface EntityNode {
  uuid: string;
  name: string;
-  type: string; // Single type - either from presets or custom
+  type?: string; // Optional type - can be inferred from statements
  attributes: Record<string, any>;
  nameEmbedding: number[];
-  typeEmbedding: number[];
+  typeEmbedding?: number[]; // Optional since type is optional
  createdAt: Date;
  userId: string;
  space?: string;
@ -123,10 +123,10 @@ export type AddEpisodeResult = {

 export interface ExtractedTripleData {
  source: string;
-  sourceType: string;
+  sourceType?: string; // Optional - can be inferred from statements
  predicate: string;
  target: string;
-  targetType: string;
+  targetType?: string; // Optional - can be inferred from statements
  fact: string;
  attributes?: Record<string, any>;
 }