diff --git a/apps/webapp/app/services/documentChunker.server.ts b/apps/webapp/app/services/documentChunker.server.ts index 63cc99f..fc87a29 100644 --- a/apps/webapp/app/services/documentChunker.server.ts +++ b/apps/webapp/app/services/documentChunker.server.ts @@ -23,12 +23,12 @@ export interface ChunkedDocument { /** * Document chunking service that splits large documents into semantic chunks - * Targets 10-15k tokens per chunk with natural paragraph boundaries + * Targets 1-3k tokens per chunk for better entity extraction with natural paragraph boundaries */ export class DocumentChunker { - private readonly TARGET_CHUNK_SIZE = 12500; // Middle of 10-15k range - private readonly MIN_CHUNK_SIZE = 10000; - private readonly MAX_CHUNK_SIZE = 15000; + private readonly TARGET_CHUNK_SIZE = 3000; // Much smaller for better entity extraction + private readonly MIN_CHUNK_SIZE = 1000; + private readonly MAX_CHUNK_SIZE = 5000; private readonly MIN_PARAGRAPH_SIZE = 100; // Minimum tokens for a paragraph to be considered /** diff --git a/apps/webapp/app/services/mcp.server.ts b/apps/webapp/app/services/mcp.server.ts index e7365fd..38ae66a 100644 --- a/apps/webapp/app/services/mcp.server.ts +++ b/apps/webapp/app/services/mcp.server.ts @@ -61,7 +61,7 @@ async function createMcpServer( const { name, arguments: args } = request.params; // Handle memory tools - if (name.startsWith("memory_")) { + if (name.startsWith("memory_") || name.startsWith("get_user_profile")) { return await callMemoryTool(name, args, userId, source); } diff --git a/apps/webapp/app/services/prompts/nodes.ts b/apps/webapp/app/services/prompts/nodes.ts index 036cdea..1937eee 100644 --- a/apps/webapp/app/services/prompts/nodes.ts +++ b/apps/webapp/app/services/prompts/nodes.ts @@ -23,6 +23,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr 1. **Entity Identification**: - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE. - For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X). + - **ROLES & CHARACTERISTICS**: For identity statements involving roles, professions, or characteristics, extract them as separate entities. - For pronouns that refer to named entities, extract them as separate Alias entities. 2. **Entity Classification**: @@ -34,6 +35,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr 3. **Exclusions**: - Do NOT extract entities representing relationships or actions (predicates will be handled separately). + - **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements. - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately. - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm"). @@ -141,6 +143,7 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli 1. **Entity Identification**: - Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT. - For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X). + - **ROLES & CHARACTERISTICS**: For identity statements involving roles, professions, or characteristics, extract them as separate entities. - For pronouns that refer to named entities, extract them as separate Alias entities. 2. **Entity Classification**: @@ -152,6 +155,7 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli 3. **Exclusions**: - Do NOT extract entities representing relationships or actions (predicates will be handled separately). + - **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements. - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately. - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm"). diff --git a/apps/webapp/app/services/prompts/statements.ts b/apps/webapp/app/services/prompts/statements.ts index 497d14a..ed88236 100644 --- a/apps/webapp/app/services/prompts/statements.ts +++ b/apps/webapp/app/services/prompts/statements.ts @@ -52,15 +52,19 @@ Follow these instructions: - predicate: The relationship type (can be a descriptive phrase) - target: The object entity (MUST be from AVAILABLE ENTITIES) -EXTRACT NEW MEANINGFUL RELATIONSHIPS: +EXTRACT NEW MEANINGFUL RELATIONSHIPS AND CHARACTERISTICS: - Extract meaningful relationships between available entities that are NOT already captured in previous episodes +- Extract individual entity characteristics, roles, and properties as standalone facts - Use predicates that accurately describe new relationships between entities - Be creative but precise in identifying NEW relationships - focus on value-adding connections - **HIGHEST PRIORITY**: Entities with identical names but different types MUST be connected with explicit relationship statements - **MANDATORY**: When you find entities like "John (Person)" and "John (Company)", create explicit relationships such as "John" "owns" "John" or "John" "founded" "John" +- **ROLE/CHARACTERISTIC EXTRACTION**: Always extract roles, professions, titles, and key characteristics as separate statements - Look for both explicit and implicit NEW relationships mentioned in the text - **FILTER OUT**: Relationships already established in previous episodes unless they represent updates or changes - Common relationship types include (but are not limited to): + * **Roles and professions** (e.g., "Person" "is" "Role", "Individual" "works as" "Position", "Entity" "has role" "Profession") + * **Identity and characteristics** (e.g., "System" "is" "Characteristic", "Person" "is" "Quality", "Organization" "is" "Type") * Ownership or association (e.g., "Alice" "owns" "Restaurant") * Participation or attendance (e.g., "Team" "participates in" "Tournament") * Personal connections (e.g., "Sarah" "works with" "Michael") @@ -157,10 +161,11 @@ IMPORTANT RULES: - **OUTPUT FORMAT**: Always wrap output in tags Example of CORRECT usage: -If AVAILABLE ENTITIES contains ["John", "Max", "Wedding", "John (Company)"], you can create: -- "John" "attends" "Wedding" ✓ (if not already in previous episodes) -- "Max" "married to" "Tina" with timespan attribute ✓ (if new relationship) -- "John" "founded" "John (Company)" ✓ (PRIORITY: same name, different types) +If AVAILABLE ENTITIES contains ["Person", "Individual", "Event", "Organization", "Role"], you can create: +- "Person" "is" "Role" ✓ (PRIORITY: role/characteristic extraction) +- "Person" "attends" "Event" ✓ (if not already in previous episodes) +- "Individual" "married to" "Person" with timespan attribute ✓ (if new relationship) +- "Person" "founded" "Organization" ✓ (PRIORITY: same name, different types when applicable) Example of CORRECT Duration/TemporalContext usage: If AVAILABLE ENTITIES contains ["Caroline", "friends", "4 years", "since moving", "breakup"]: diff --git a/apps/webapp/app/services/space.server.ts b/apps/webapp/app/services/space.server.ts index 58a7a44..a08ff7c 100644 --- a/apps/webapp/app/services/space.server.ts +++ b/apps/webapp/app/services/space.server.ts @@ -111,6 +111,26 @@ export class SpaceService { }); } + async getSpaceByName(name: string, userId: string) { + const user = await prisma.user.findFirst({ + where: { + id: userId, + }, + include: { + Workspace: true, + }, + }); + + const space = await prisma.space.findFirst({ + where: { + name: name, + workspaceId: user?.Workspace?.id, + }, + }); + + return space; + } + /** * Get a specific space by ID */ diff --git a/apps/webapp/app/trigger/spaces/space-summary.ts b/apps/webapp/app/trigger/spaces/space-summary.ts index bdafa2d..4968c14 100644 --- a/apps/webapp/app/trigger/spaces/space-summary.ts +++ b/apps/webapp/app/trigger/spaces/space-summary.ts @@ -352,6 +352,11 @@ function createUnifiedSummaryPrompt( previousSummary: string | null, previousThemes: string[], ): CoreMessage[] { + // If there are no statements and no previous summary, we cannot generate a meaningful summary + if (statements.length === 0 && previousSummary === null) { + throw new Error("Cannot generate summary without statements or existing summary"); + } + const statementsText = statements .map( (stmt) => @@ -378,6 +383,13 @@ function createUnifiedSummaryPrompt( role: "system", content: `You are an expert at analyzing and summarizing structured knowledge within semantic spaces. Your task is to ${isUpdate ? "update an existing summary by integrating new statements" : "create a comprehensive summary of statements"}. +CRITICAL RULES: +1. Base your summary ONLY on insights derived from the actual facts/statements provided +2. Use the space description only as contextual guidance, never copy or paraphrase it +3. Write in a factual, neutral tone - avoid promotional language ("pivotal", "invaluable", "cutting-edge") +4. Be specific and concrete - reference actual entities, relationships, and patterns found in the data +5. If statements are insufficient for meaningful insights, state that more data is needed + INSTRUCTIONS: ${ isUpdate @@ -415,7 +427,7 @@ Provide your response inside tags with valid JSON. The summary { - "summary": "${isUpdate ? "Updated HTML summary that integrates new insights with existing knowledge through identified connections. Use HTML tags like

, , ,