diff --git a/apps/webapp/app/routes/api.v1.spaces.$spaceId.episodes.ts b/apps/webapp/app/routes/api.v1.spaces.$spaceId.episodes.ts new file mode 100644 index 0000000..07238bc --- /dev/null +++ b/apps/webapp/app/routes/api.v1.spaces.$spaceId.episodes.ts @@ -0,0 +1,49 @@ +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { SpaceService } from "~/services/space.server"; +import { json } from "@remix-run/node"; +import { getSpaceEpisodeCount } from "~/services/graphModels/space"; + +const spaceService = new SpaceService(); + +// Schema for space ID parameter +const SpaceParamsSchema = z.object({ + spaceId: z.string(), +}); + +const { loader } = createActionApiRoute( + { + params: SpaceParamsSchema, + allowJWT: true, + authorization: { + action: "search", + }, + corsStrategy: "all", + }, + async ({ authentication, params }) => { + const userId = authentication.userId; + const { spaceId } = params; + + // Verify space exists and belongs to user + const space = await spaceService.getSpace(spaceId, userId); + if (!space) { + return json({ error: "Space not found" }, { status: 404 }); + } + + // Get episodes in the space + const episodes = await spaceService.getSpaceEpisodes(spaceId, userId); + const episodeCount = await getSpaceEpisodeCount(spaceId, userId); + + return json({ + episodes, + space: { + uuid: space.uuid, + name: space.name, + description: space.description, + episodeCount, + } + }); + } +); + +export { loader }; diff --git a/apps/webapp/app/routes/api.v1.spaces.$spaceId.reset.ts b/apps/webapp/app/routes/api.v1.spaces.$spaceId.reset.ts index 32e67af..ba3d93c 100644 --- a/apps/webapp/app/routes/api.v1.spaces.$spaceId.reset.ts +++ b/apps/webapp/app/routes/api.v1.spaces.$spaceId.reset.ts @@ -1,16 +1,7 @@ import { z } from "zod"; -import { - createActionApiRoute, - createHybridActionApiRoute, -} from "~/services/routeBuilders/apiBuilder.server"; +import { createHybridActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { SpaceService } from "~/services/space.server"; import { json } from "@remix-run/node"; -import { - createSpace, - deleteSpace, - updateSpace, -} from "~/services/graphModels/space"; -import { prisma } from "~/db.server"; import { logger } from "~/services/logger.service"; import { triggerSpaceAssignment } from "~/trigger/spaces/space-assignment"; @@ -33,45 +24,26 @@ const { loader, action } = createHybridActionApiRoute( const { spaceId } = params; const spaceService = new SpaceService(); - // Verify space exists and belongs to user - const space = await prisma.space.findUnique({ - where: { - id: spaceId, - }, - }); - if (!space) { - return json({ error: "Space not found" }, { status: 404 }); - } + // Reset the space (clears all assignments, summary, and metadata) + const space = await spaceService.resetSpace(spaceId, userId); - // Get statements in the space - await deleteSpace(spaceId, userId); + logger.info(`Reset space ${space.id} successfully`); - await createSpace( - space.id, - space.name.trim(), - space.description?.trim(), - userId, - ); - - await spaceService.updateSpace(space.id, { status: "pending" }, userId); - - logger.info(`Created space ${space.id} successfully`); - - // Trigger automatic LLM assignment for the new space + // Trigger automatic episode assignment for the reset space try { await triggerSpaceAssignment({ userId: userId, workspaceId: space.workspaceId, mode: "new_space", newSpaceId: space.id, - batchSize: 25, // Analyze recent statements for the new space + batchSize: 20, // Analyze recent episodes for reassignment }); - logger.info(`Triggered LLM space assignment for new space ${space.id}`); + logger.info(`Triggered space assignment for reset space ${space.id}`); } catch (error) { - // Don't fail space creation if LLM assignment fails + // Don't fail space reset if assignment fails logger.warn( - `Failed to trigger LLM assignment for space ${space.id}:`, + `Failed to trigger assignment for space ${space.id}:`, error as Record, ); } diff --git a/apps/webapp/app/routes/api.v1.spaces.$spaceId.statements.ts b/apps/webapp/app/routes/api.v1.spaces.$spaceId.statements.ts index ac07f16..7f6caa9 100644 --- a/apps/webapp/app/routes/api.v1.spaces.$spaceId.statements.ts +++ b/apps/webapp/app/routes/api.v1.spaces.$spaceId.statements.ts @@ -32,7 +32,10 @@ const { loader } = createActionApiRoute( // Get statements in the space const statements = await spaceService.getSpaceStatements(spaceId, userId); - return json({ + return json({ + deprecated: true, + deprecationMessage: "This endpoint is deprecated. Use /api/v1/spaces/{spaceId}/episodes instead. Spaces now work with episodes directly.", + newEndpoint: `/api/v1/spaces/${spaceId}/episodes`, statements, space: { uuid: space.uuid, diff --git a/apps/webapp/app/services/graphModels/space.ts b/apps/webapp/app/services/graphModels/space.ts index 9af063e..d48b0f5 100644 --- a/apps/webapp/app/services/graphModels/space.ts +++ b/apps/webapp/app/services/graphModels/space.ts @@ -56,13 +56,12 @@ export async function getSpace( const query = ` MATCH (s:Space {uuid: $spaceId, userId: $userId}) WHERE s.isActive = true - - // Count statements in this space using optimized approach - OPTIONAL MATCH (stmt:Statement {userId: $userId}) - WHERE stmt.spaceIds IS NOT NULL AND $spaceId IN stmt.spaceIds AND stmt.invalidAt IS NULL - - WITH s, count(stmt) as statementCount - RETURN s, statementCount + + // Count episodes assigned to this space using direct relationship + OPTIONAL MATCH (s)-[:HAS_EPISODE]->(e:Episode {userId: $userId}) + + WITH s, count(e) as episodeCount + RETURN s, episodeCount `; const result = await runQuery(query, { spaceId, userId }); @@ -71,7 +70,7 @@ export async function getSpace( } const spaceData = result[0].get("s").properties; - const statementCount = result[0].get("statementCount") || 0; + const episodeCount = result[0].get("episodeCount") || 0; return { uuid: spaceData.uuid, @@ -81,7 +80,7 @@ export async function getSpace( createdAt: new Date(spaceData.createdAt), updatedAt: new Date(spaceData.updatedAt), isActive: spaceData.isActive, - statementCount: Number(statementCount), + contextCount: Number(episodeCount), // Episode count = context count }; } @@ -151,28 +150,45 @@ export async function deleteSpace( } // 2. Clean up statement references (remove spaceId from spaceIds arrays) - const cleanupQuery = ` + const cleanupStatementsQuery = ` MATCH (s:Statement {userId: $userId}) WHERE s.spaceIds IS NOT NULL AND $spaceId IN s.spaceIds SET s.spaceIds = [id IN s.spaceIds WHERE id <> $spaceId] RETURN count(s) as updatedStatements `; - const cleanupResult = await runQuery(cleanupQuery, { userId, spaceId }); - const updatedStatements = cleanupResult[0]?.get("updatedStatements") || 0; + const cleanupStatementsResult = await runQuery(cleanupStatementsQuery, { userId, spaceId }); + const updatedStatements = cleanupStatementsResult[0]?.get("updatedStatements") || 0; - // 3. Delete the space node + // 3. Clean up episode references (remove spaceId from spaceIds arrays) + const cleanupEpisodesQuery = ` + MATCH (e:Episode {userId: $userId}) + WHERE e.spaceIds IS NOT NULL AND $spaceId IN e.spaceIds + SET e.spaceIds = [id IN e.spaceIds WHERE id <> $spaceId] + RETURN count(e) as updatedEpisodes + `; + + const cleanupEpisodesResult = await runQuery(cleanupEpisodesQuery, { userId, spaceId }); + const updatedEpisodes = cleanupEpisodesResult[0]?.get("updatedEpisodes") || 0; + + // 4. Delete the space node and all its relationships const deleteQuery = ` MATCH (space:Space {uuid: $spaceId, userId: $userId}) - DELETE space + DETACH DELETE space RETURN count(space) as deletedSpaces `; await runQuery(deleteQuery, { userId, spaceId }); + logger.info(`Deleted space ${spaceId}`, { + userId, + statementsUpdated: updatedStatements, + episodesUpdated: updatedEpisodes, + }); + return { deleted: true, - statementsUpdated: Number(updatedStatements), + statementsUpdated: Number(updatedStatements) + Number(updatedEpisodes), }; } catch (error) { return { @@ -319,127 +335,6 @@ export async function getSpaceStatementCount( return Number(result[0]?.get("statementCount") || 0); } -/** - * Check if a space should trigger pattern analysis based on growth thresholds - */ -export async function shouldTriggerSpacePattern( - spaceId: string, - userId: string, -): Promise<{ - shouldTrigger: boolean; - isNewSpace: boolean; - currentCount: number; -}> { - try { - // Get current statement count from Neo4j - const currentCount = await getSpaceStatementCount(spaceId, userId); - - // Get space data from PostgreSQL - const space = await prisma.space.findUnique({ - where: { id: spaceId }, - select: { - lastPatternTrigger: true, - statementCountAtLastTrigger: true, - }, - }); - - if (!space) { - logger.warn(`Space ${spaceId} not found when checking pattern trigger`); - return { shouldTrigger: false, isNewSpace: false, currentCount }; - } - - const isNewSpace = !space.lastPatternTrigger; - const previousCount = space.statementCountAtLastTrigger || 0; - const growth = currentCount - previousCount; - - // Trigger if: new space OR growth >= 100 statements - const shouldTrigger = isNewSpace || growth >= 100; - - logger.info(`Space pattern trigger check`, { - spaceId, - currentCount, - previousCount, - growth, - isNewSpace, - shouldTrigger, - }); - - return { shouldTrigger, isNewSpace, currentCount }; - } catch (error) { - logger.error(`Error checking space pattern trigger:`, { - error, - spaceId, - userId, - }); - return { shouldTrigger: false, isNewSpace: false, currentCount: 0 }; - } -} - -/** - * Atomically update pattern trigger timestamp and statement count to prevent race conditions - */ -export async function atomicUpdatePatternTrigger( - spaceId: string, - currentCount: number, -): Promise<{ updated: boolean; isNewSpace: boolean } | null> { - try { - // Use a transaction to atomically check and update - const result = await prisma.$transaction(async (tx) => { - // Get current state - const space = await tx.space.findUnique({ - where: { id: spaceId }, - select: { - lastPatternTrigger: true, - statementCountAtLastTrigger: true, - }, - }); - - if (!space) { - throw new Error(`Space ${spaceId} not found`); - } - - const isNewSpace = !space.lastPatternTrigger; - const previousCount = space.statementCountAtLastTrigger || 0; - const growth = currentCount - previousCount; - - // Double-check if we still need to trigger (race condition protection) - const shouldTrigger = isNewSpace || growth >= 100; - - if (!shouldTrigger) { - return { updated: false, isNewSpace: false }; - } - - // Update the trigger timestamp and count atomically - await tx.space.update({ - where: { id: spaceId }, - data: { - lastPatternTrigger: new Date(), - statementCountAtLastTrigger: currentCount, - }, - }); - - logger.info(`Atomically updated pattern trigger for space`, { - spaceId, - previousCount, - currentCount, - growth, - isNewSpace, - }); - - return { updated: true, isNewSpace }; - }); - - return result; - } catch (error) { - logger.error(`Error in atomic pattern trigger update:`, { - error, - spaceId, - currentCount, - }); - return null; - } -} - /** * Initialize spaceIds array for existing statements (migration helper) */ @@ -463,3 +358,153 @@ export async function initializeStatementSpaceIds( const result = await runQuery(query, userId ? { userId } : {}); return Number(result[0]?.get("updated") || 0); } + +/** + * Assign episodes to a space using intent-based matching + */ +export async function assignEpisodesToSpace( + episodeIds: string[], + spaceId: string, + userId: string, +): Promise { + try { + // Verify space exists and belongs to user + const space = await getSpace(spaceId, userId); + if (!space) { + return { + success: false, + statementsUpdated: 0, + error: "Space not found or access denied", + }; + } + + // Update episodes with spaceIds array AND create HAS_EPISODE relationships + // This hybrid approach enables both fast array lookups and graph traversal + const query = ` + MATCH (space:Space {uuid: $spaceId, userId: $userId}) + MATCH (e:Episode {userId: $userId}) + WHERE e.uuid IN $episodeIds + SET e.spaceIds = CASE + WHEN e.spaceIds IS NULL THEN [$spaceId] + WHEN $spaceId IN e.spaceIds THEN e.spaceIds + ELSE e.spaceIds + [$spaceId] + END, + e.lastSpaceAssignment = datetime(), + e.spaceAssignmentMethod = CASE + WHEN e.spaceAssignmentMethod IS NULL THEN 'intent_based' + ELSE e.spaceAssignmentMethod + END + WITH e, space + MERGE (space)-[r:HAS_EPISODE]->(e) + ON CREATE SET + r.assignedAt = datetime(), + r.assignmentMethod = 'intent_based' + RETURN count(e) as updated + `; + + const result = await runQuery(query, { episodeIds, spaceId, userId }); + const updatedCount = result[0]?.get("updated") || 0; + + logger.info(`Assigned ${updatedCount} episodes to space ${spaceId}`, { + episodeIds: episodeIds.length, + userId, + }); + + return { + success: true, + statementsUpdated: Number(updatedCount), + }; + } catch (error) { + logger.error(`Error assigning episodes to space:`, { + error, + spaceId, + episodeIds: episodeIds.length, + }); + return { + success: false, + statementsUpdated: 0, + error: error instanceof Error ? error.message : "Unknown error", + }; + } +} + +/** + * Remove episodes from a space + */ +export async function removeEpisodesFromSpace( + episodeIds: string[], + spaceId: string, + userId: string, +): Promise { + try { + // Remove from both spaceIds array and HAS_EPISODE relationship + const query = ` + MATCH (e:Episode {userId: $userId}) + WHERE e.uuid IN $episodeIds AND e.spaceIds IS NOT NULL AND $spaceId IN e.spaceIds + SET e.spaceIds = [id IN e.spaceIds WHERE id <> $spaceId] + WITH e + MATCH (space:Space {uuid: $spaceId, userId: $userId})-[r:HAS_EPISODE]->(e) + DELETE r + RETURN count(e) as updated + `; + + const result = await runQuery(query, { episodeIds, spaceId, userId }); + const updatedCount = result[0]?.get("updated") || 0; + + return { + success: true, + statementsUpdated: Number(updatedCount), + }; + } catch (error) { + return { + success: false, + statementsUpdated: 0, + error: error instanceof Error ? error.message : "Unknown error", + }; + } +} + +/** + * Get all episodes in a space + */ +export async function getSpaceEpisodes(spaceId: string, userId: string) { + const query = ` + MATCH (space:Space {uuid: $spaceId, userId: $userId})-[:HAS_EPISODE]->(e:Episode {userId: $userId}) + RETURN e + ORDER BY e.createdAt DESC + `; + + const result = await runQuery(query, { spaceId, userId }); + + return result.map((record) => { + const episode = record.get("e").properties; + return { + uuid: episode.uuid, + content: episode.content, + originalContent: episode.originalContent, + source: episode.source, + createdAt: new Date(episode.createdAt), + validAt: new Date(episode.validAt), + metadata: JSON.parse(episode.metadata || "{}"), + sessionId: episode.sessionId, + }; + }); +} + +/** + * Get episode count for a space + */ +export async function getSpaceEpisodeCount( + spaceId: string, + userId: string, +): Promise { + // Use spaceIds array for faster lookup instead of relationship traversal + const query = ` + MATCH (e:Episode {userId: $userId}) + WHERE e.spaceIds IS NOT NULL AND $spaceId IN e.spaceIds + RETURN count(e) as episodeCount + `; + + const result = await runQuery(query, { spaceId, userId }); + return Number(result[0]?.get("episodeCount") || 0); +} diff --git a/apps/webapp/app/services/space.server.ts b/apps/webapp/app/services/space.server.ts index f28cc1f..bfc4fa7 100644 --- a/apps/webapp/app/services/space.server.ts +++ b/apps/webapp/app/services/space.server.ts @@ -9,6 +9,7 @@ import { type Space } from "@prisma/client"; import { triggerSpaceAssignment } from "~/trigger/spaces/space-assignment"; import { + assignEpisodesToSpace, assignStatementsToSpace, createSpace, deleteSpace, @@ -182,13 +183,6 @@ export class SpaceService { } } - if ( - updates.description !== undefined && - updates.description.length > 1000 - ) { - throw new Error("Space description too long (max 1000 characters)"); - } - const space = await prisma.space.update({ where: { id: spaceId, @@ -232,6 +226,58 @@ export class SpaceService { return space; } + /** + * Reset a space by clearing all episode assignments, summary, and metadata + */ + async resetSpace(spaceId: string, userId: string): Promise { + logger.info(`Resetting space ${spaceId} for user ${userId}`); + + // Get the space first to verify it exists and get its details + const space = await prisma.space.findUnique({ + where: { + id: spaceId, + }, + }); + + if (!space) { + throw new Error("Space not found"); + } + + if (space.name === "Profile") { + throw new Error("Cannot reset Profile space"); + } + + // Delete all relationships in Neo4j (episodes, statements, etc.) + await deleteSpace(spaceId, userId); + + // Recreate the space in Neo4j (clean slate) + await createSpace( + space.id, + space.name.trim(), + space.description?.trim(), + userId, + ); + + // Reset all summary and metadata fields in PostgreSQL + const resetSpace = await prisma.space.update({ + where: { + id: spaceId, + }, + data: { + summary: null, + themes: [], + contextCount: null, + status: "pending", + summaryGeneratedAt: null, + lastPatternTrigger: null, + }, + }); + + logger.info(`Reset space ${spaceId} successfully`); + + return resetSpace; + } + /** * Assign statements to a space */ @@ -310,12 +356,60 @@ export class SpaceService { /** * Get all statements in a space + * @deprecated Use getSpaceEpisodes instead - spaces now work with episodes */ async getSpaceStatements(spaceId: string, userId: string) { logger.info(`Fetching statements for space ${spaceId} for user ${userId}`); return await getSpaceStatements(spaceId, userId); } + /** + * Get all episodes in a space + */ + async getSpaceEpisodes(spaceId: string, userId: string) { + logger.info(`Fetching episodes for space ${spaceId} for user ${userId}`); + const { getSpaceEpisodes } = await import("./graphModels/space"); + return await getSpaceEpisodes(spaceId, userId); + } + + /** + * Assign episodes to a space + */ + async assignEpisodesToSpace( + episodeIds: string[], + spaceId: string, + userId: string, + ) { + logger.info( + `Assigning ${episodeIds.length} episodes to space ${spaceId} for user ${userId}`, + ); + + await assignEpisodesToSpace(episodeIds,spaceId, userId); + + logger.info( + `Successfully assigned ${episodeIds.length} episodes to space ${spaceId}`, + ); + } + + /** + * Remove episodes from a space + */ + async removeEpisodesFromSpace( + episodeIds: string[], + spaceId: string, + userId: string, + ) { + logger.info( + `Removing ${episodeIds.length} episodes from space ${spaceId} for user ${userId}`, + ); + + await this.removeEpisodesFromSpace(episodeIds, spaceId, userId); + + logger.info( + `Successfully removed ${episodeIds.length} episodes from space ${spaceId}`, + ); + } + /** * Search spaces by name */ diff --git a/apps/webapp/app/trigger/spaces/space-assignment.ts b/apps/webapp/app/trigger/spaces/space-assignment.ts index 47a1ce6..2888f52 100644 --- a/apps/webapp/app/trigger/spaces/space-assignment.ts +++ b/apps/webapp/app/trigger/spaces/space-assignment.ts @@ -5,10 +5,8 @@ import { makeModelCall } from "~/lib/model.server"; import { createBatch, getBatch } from "~/lib/batch.server"; import { runQuery } from "~/lib/neo4j.server"; import { - assignStatementsToSpace, - shouldTriggerSpacePattern, - atomicUpdatePatternTrigger, - getSpaceStatementCount, + assignEpisodesToSpace, + getSpaceEpisodeCount, } from "~/services/graphModels/space"; import { triggerSpaceSummary } from "./space-summary"; import { triggerSpacePattern } from "./space-pattern"; @@ -29,25 +27,24 @@ interface SpaceAssignmentPayload { batchSize?: number; // Processing batch size } -interface StatementData { +interface EpisodeData { uuid: string; - fact: string; - subject: string; - predicate: string; - object: string; + content: string; + originalContent: string; + source: string; createdAt: Date; - spaceIds: string[]; + metadata: any; } interface SpaceData { uuid: string; name: string; description?: string; - statementCount: number; + episodeCount: number; } interface AssignmentResult { - statementId: string; + episodeId: string; spaceIds: string[]; confidence: number; reasoning?: string; @@ -55,123 +52,29 @@ interface AssignmentResult { const CONFIG = { newSpaceMode: { - batchSize: 200, - confidenceThreshold: 0.85, // High quality threshold for new space creation + batchSize: 20, + confidenceThreshold: 0.75, // Intent-based threshold for new space creation useBatchAPI: true, // Use batch API for new space mode - minStatementsForBatch: 10, // Minimum statements to use batch API + minEpisodesForBatch: 5, // Minimum episodes to use batch API }, episodeMode: { - batchSize: 200, - confidenceThreshold: 0.85, // Strict threshold for theme validation (5+ statements) + batchSize: 20, + confidenceThreshold: 0.75, // Intent-based threshold for episode assignment useBatchAPI: true, // Use batch API for episode mode - minStatementsForBatch: 5, // Minimum statements to use batch API + minEpisodesForBatch: 5, // Minimum episodes to use batch API }, }; // Zod schema for LLM response validation const AssignmentResultSchema = z.array( z.object({ - statementId: z.string(), + episodeId: z.string(), addSpaceId: z.array(z.string()), confidence: z.number(), + reasoning: z.string(), }), ); -/** - * Check and trigger space patterns for spaces that meet growth thresholds - */ -async function checkAndTriggerSpacePatterns( - affectedSpaces: Set, - userId: string, - workspaceId: string, -): Promise { - if (affectedSpaces.size === 0) return; - - logger.info( - `Checking pattern triggers for ${affectedSpaces.size} affected spaces`, - { - userId, - spaceIds: Array.from(affectedSpaces), - }, - ); - - const patternPromises = Array.from(affectedSpaces).map(async (spaceId) => { - try { - // Check if this space should trigger pattern analysis - const triggerCheck = await shouldTriggerSpacePattern(spaceId, userId); - - if (triggerCheck.shouldTrigger) { - // Atomically update the trigger timestamp to prevent race conditions - const updateResult = await atomicUpdatePatternTrigger( - spaceId, - triggerCheck.currentCount, - ); - - if (updateResult?.updated) { - const triggerSource = updateResult.isNewSpace - ? "new_space" - : "growth_threshold"; - - logger.info(`Triggering space pattern analysis`, { - spaceId, - triggerSource, - currentCount: triggerCheck.currentCount, - isNewSpace: updateResult.isNewSpace, - }); - - await triggerSpacePattern({ - userId, - workspaceId, - spaceId, - triggerSource: triggerSource as "new_space" | "growth_threshold", - }); - - return { success: true, spaceId, triggerSource }; - } else { - logger.info(`Pattern trigger update failed or no longer needed`, { - spaceId, - triggerCheck, - }); - return { success: false, spaceId, reason: "update_failed" }; - } - } else { - logger.info(`Space does not meet pattern trigger criteria`, { - spaceId, - currentCount: triggerCheck.currentCount, - isNewSpace: triggerCheck.isNewSpace, - }); - return { success: false, spaceId, reason: "threshold_not_met" }; - } - } catch (error) { - logger.error(`Error checking pattern trigger for space ${spaceId}:`, { - error, - userId, - }); - return { - success: false, - spaceId, - error: error instanceof Error ? error.message : "unknown_error", - }; - } - }); - - const results = await Promise.allSettled(patternPromises); - const successful = results.filter( - (r) => r.status === "fulfilled" && r.value.success, - ).length; - const failed = results.filter( - (r) => - r.status === "rejected" || (r.status === "fulfilled" && !r.value.success), - ).length; - - logger.info(`Pattern trigger check completed`, { - userId, - totalSpaces: affectedSpaces.size, - successful, - failed, - }); -} - export const spaceAssignmentTask = task({ id: "space-assignment", maxDuration: 1800, // 15 minutes timeout @@ -210,38 +113,38 @@ export const spaceAssignmentTask = task({ }; } - // 2. Get statements to analyze based on mode - const statements = await getStatementsToAnalyze(userId, mode, { + // 2. Get episodes to analyze based on mode + const episodes = await getEpisodesToAnalyze(userId, mode, { newSpaceId, episodeIds, }); - if (statements.length === 0) { + if (episodes.length === 0) { logger.info( - `No statements to analyze for user ${userId} in ${mode} mode`, + `No episodes to analyze for user ${userId} in ${mode} mode`, ); return { success: true, - message: "No statements to analyze", + message: "No episodes to analyze", processed: 0, }; } - // 3. Process statements using batch AI or fallback to sequential + // 3. Process episodes using batch AI or fallback to sequential const config = mode === "new_space" ? CONFIG.newSpaceMode : CONFIG.episodeMode; // const shouldUseBatchAPI = - // config.useBatchAPI && statements.length >= config.minStatementsForBatch; + // config.useBatchAPI && episodes.length >= config.minEpisodesForBatch; const shouldUseBatchAPI = true; let totalProcessed = 0; let totalAssignments = 0; let totalBatches = 0; - const affectedSpaces = new Set(); // Track spaces that received new statements + const affectedSpaces = new Set(); // Track spaces that received new episodes if (shouldUseBatchAPI) { logger.info( - `Using Batch AI processing for ${statements.length} statements`, + `Using Batch AI processing for ${episodes.length} episodes`, { mode, userId, @@ -250,7 +153,7 @@ export const spaceAssignmentTask = task({ ); const batchResult = await processBatchAI( - statements, + episodes, spaces, userId, mode, @@ -264,22 +167,22 @@ export const spaceAssignmentTask = task({ ); } else { logger.info( - `Using sequential processing for ${statements.length} statements (below batch threshold)`, + `Using sequential processing for ${episodes.length} episodes (below batch threshold)`, { mode, userId, - minRequired: config.minStatementsForBatch, + minRequired: config.minEpisodesForBatch, }, ); - // Fallback to sequential processing for smaller statement sets - totalBatches = Math.ceil(statements.length / batchSize); + // Fallback to sequential processing for smaller episode sets + totalBatches = Math.ceil(episodes.length / batchSize); for (let i = 0; i < totalBatches; i++) { - const batch = statements.slice(i * batchSize, (i + 1) * batchSize); + const batch = episodes.slice(i * batchSize, (i + 1) * batchSize); logger.info( - `Processing batch ${i + 1}/${totalBatches} with ${batch.length} statements`, + `Processing batch ${i + 1}/${totalBatches} with ${batch.length} episodes`, { mode, userId, @@ -384,32 +287,7 @@ export const spaceAssignmentTask = task({ } } - // 5. Check and trigger space patterns for qualifying spaces (new spaces or 100+ growth) - if (affectedSpaces.size > 0) { - try { - logger.info(`Checking pattern triggers for affected spaces`, { - userId, - mode, - affectedSpacesCount: affectedSpaces.size, - }); - - await checkAndTriggerSpacePatterns( - affectedSpaces, - userId, - workspaceId, - ); - } catch (patternError) { - // Don't fail the assignment if pattern triggering fails - logger.warn(`Failed to trigger space patterns after assignment:`, { - error: patternError, - userId, - mode, - affectedSpaces: Array.from(affectedSpaces), - }); - } - } - - // 7. Update space status to "ready" after all processing is complete + // 6. Update space status to "ready" after all processing is complete if (affectedSpaces.size > 0) { try { await updateMultipleSpaceStatuses( @@ -451,36 +329,29 @@ export const spaceAssignmentTask = task({ }, }); -async function getStatementsToAnalyze( +async function getEpisodesToAnalyze( userId: string, mode: "new_space" | "episode", options: { newSpaceId?: string; episodeIds?: string[] }, -): Promise { +): Promise { let query: string; let params: any = { userId }; if (mode === "new_space") { - // For new space: analyze all statements (or recent ones) + // For new space: analyze all recent episodes query = ` - MATCH (s:Statement) - WHERE s.userId = $userId - MATCH (s)-[:HAS_SUBJECT]->(subj:Entity) - MATCH (s)-[:HAS_PREDICATE]->(pred:Entity) - MATCH (s)-[:HAS_OBJECT]->(obj:Entity) - RETURN s, subj.name as subject, pred.name as predicate, obj.name as object - ORDER BY s.createdAt DESC + MATCH (e:Episode {userId: $userId}) + RETURN e + ORDER BY e.createdAt DESC + LIMIT 1000 `; } else { - // Optimized query: Use UNWIND for better performance with IN clause - // and combine entity lookups in single pattern + // For episode mode: analyze specific episodes query = ` UNWIND $episodeIds AS episodeId - MATCH (e:Episode {uuid: episodeId, userId: $userId})-[:HAS_PROVENANCE]->(s:Statement) - MATCH (s)-[:HAS_SUBJECT]->(subj:Entity), - (s)-[:HAS_PREDICATE]->(pred:Entity), - (s)-[:HAS_OBJECT]->(obj:Entity) - RETURN s, subj.name as subject, pred.name as predicate, obj.name as object - ORDER BY s.createdAt DESC + MATCH (e:Episode {uuid: episodeId, userId: $userId}) + RETURN e + ORDER BY e.createdAt DESC `; params.episodeIds = options.episodeIds; } @@ -488,21 +359,20 @@ async function getStatementsToAnalyze( const result = await runQuery(query, params); return result.map((record) => { - const statement = record.get("s").properties; + const episode = record.get("e").properties; return { - uuid: statement.uuid, - fact: statement.fact, - subject: record.get("subject"), - predicate: record.get("predicate"), - object: record.get("object"), - createdAt: new Date(statement.createdAt), - spaceIds: statement.spaceIds || [], + uuid: episode.uuid, + content: episode.content, + originalContent: episode.originalContent, + source: episode.source, + createdAt: new Date(episode.createdAt), + metadata: JSON.parse(episode.metadata || "{}"), }; }); } async function processBatchAI( - statements: StatementData[], + episodes: EpisodeData[], spaces: Space[], userId: string, mode: "new_space" | "episode", @@ -514,19 +384,19 @@ async function processBatchAI( affectedSpaces?: string[]; }> { try { - // Create batches of statements - const statementBatches: StatementData[][] = []; - for (let i = 0; i < statements.length; i += batchSize) { - statementBatches.push(statements.slice(i, i + batchSize)); + // Create batches of episodes + const episodeBatches: EpisodeData[][] = []; + for (let i = 0; i < episodes.length; i += batchSize) { + episodeBatches.push(episodes.slice(i, i + batchSize)); } logger.info( - `Creating ${statementBatches.length} batch AI requests for ${statements.length} statements`, + `Creating ${episodeBatches.length} batch AI requests for ${episodes.length} episodes`, ); // Create batch requests with prompts const batchRequests = await Promise.all( - statementBatches.map(async (batch, index) => { + episodeBatches.map(async (batch, index) => { const promptMessages = await createLLMPrompt( batch, spaces, @@ -540,7 +410,7 @@ async function processBatchAI( promptMessages.find((m) => m.role === "user")?.content || ""; return { - customId: `space-assignment-${userId}-${mode}-${index}`, + customId: `episode-space-assignment-${userId}-${mode}-${index}`, messages: [{ role: "user" as const, content: userPrompt }], systemPrompt, }; @@ -627,10 +497,10 @@ async function processBatchAI( logger.info( `Falling back to sequential processing due to batch failures`, ); - return await processBatch(statements, spaces, userId, mode, newSpaceId); + return await processBatch(episodes, spaces, userId, mode, newSpaceId); } - return { processed: statements.length, assignments: 0 }; + return { processed: episodes.length, assignments: 0 }; } logger.info(`Processing batch results`, { @@ -666,22 +536,24 @@ async function processBatchAI( // Parse assignments from this batch result let assignments: AssignmentResult[] = []; try { - // Extract statement batch info from customId - const [, , , batchIndex] = result.customId.split("-"); - const statementBatch = statementBatches[parseInt(batchIndex)]; + // Extract episode batch info from customId + const batchIndexMatch = result.customId.match(/-(\d+)$/); + const batchIndex = batchIndexMatch ? parseInt(batchIndexMatch[1]) : 0; + const episodeBatch = episodeBatches[batchIndex]; if (Array.isArray(result.response)) { // Handle direct array response (from structured output) assignments = result.response.map((a) => ({ - statementId: a.statementId, + episodeId: a.episodeId, spaceIds: a.addSpaceId || [], - confidence: a.confidence || 0.8, + confidence: a.confidence || 0.75, + reasoning: a.reasoning, })); } else if (typeof result.response === "string") { // Parse from text response with tags (fallback for non-structured output) assignments = parseLLMResponseWithTags( result.response, - statementBatch, + episodeBatch, spaces, ); } else if (typeof result.response === "object" && result.response) { @@ -694,15 +566,16 @@ async function processBatchAI( if (Array.isArray(responseData)) { assignments = responseData.map((a) => ({ - statementId: a.statementId, + episodeId: a.episodeId, spaceIds: a.addSpaceId || [], - confidence: a.confidence || 0.8, + confidence: a.confidence || 0.75, + reasoning: a.reasoning, })); } else { // Fallback parsing assignments = parseLLMResponse( JSON.stringify(result.response), - statementBatch, + episodeBatch, spaces, ); } @@ -717,7 +590,7 @@ async function processBatchAI( // Fallback parsing assignments = parseLLMResponse( JSON.stringify(result.response), - statementBatch, + episodeBatch, spaces, ); } @@ -728,8 +601,8 @@ async function processBatchAI( continue; } - // Group statements by space for batch assignment - const spaceToStatements = new Map(); + // Group episodes by space for batch assignment + const spaceToEpisodes = new Map(); for (const assignment of assignments) { if ( @@ -737,30 +610,30 @@ async function processBatchAI( assignment.confidence >= confidenceThreshold ) { for (const spaceId of assignment.spaceIds) { - if (!spaceToStatements.has(spaceId)) { - spaceToStatements.set(spaceId, []); + if (!spaceToEpisodes.has(spaceId)) { + spaceToEpisodes.set(spaceId, []); } - spaceToStatements.get(spaceId)!.push(assignment.statementId); + spaceToEpisodes.get(spaceId)!.push(assignment.episodeId); } } } // Apply batch assignments - one call per space - for (const [spaceId, statementIds] of spaceToStatements) { + for (const [spaceId, episodeIds] of spaceToEpisodes) { try { - const assignmentResult = await assignStatementsToSpace( - statementIds, + const assignmentResult = await assignEpisodesToSpace( + episodeIds, spaceId, userId, ); if (assignmentResult.success) { - totalAssignments += statementIds.length; + totalAssignments += episodeIds.length; affectedSpaces.add(spaceId); logger.info( - `Batch AI assigned ${statementIds.length} statements to space ${spaceId}`, + `Batch AI assigned ${episodeIds.length} episodes to space ${spaceId}`, { - statementIds, + episodeIds, mode, batchId: result.customId, }, @@ -768,8 +641,8 @@ async function processBatchAI( } } catch (error) { logger.warn( - `Failed to assign ${statementIds.length} statements to space ${spaceId}:`, - { error, statementIds }, + `Failed to assign ${episodeIds.length} episodes to space ${spaceId}:`, + { error, episodeIds }, ); } } @@ -778,7 +651,7 @@ async function processBatchAI( // Log final batch processing results logger.info(`Batch AI processing completed`, { batchId, - totalStatements: statements.length, + totalEpisodes: episodes.length, processedBatches: batch.results.length, totalAssignments, affectedSpaces: affectedSpaces.size, @@ -786,7 +659,7 @@ async function processBatchAI( failedRequests: batch.failedRequests || 0, }); - // If we have significant failures, consider fallback processing for remaining statements + // If we have significant failures, consider fallback processing for remaining episodes const failureRate = batch.failedRequests ? batch.failedRequests / batch.totalRequests : 0; @@ -798,7 +671,7 @@ async function processBatchAI( } return { - processed: statements.length, + processed: episodes.length, assignments: totalAssignments, affectedSpaces: Array.from(affectedSpaces), }; @@ -809,7 +682,7 @@ async function processBatchAI( } async function processBatch( - statements: StatementData[], + episodes: EpisodeData[], spaces: Space[], userId: string, mode: "new_space" | "episode", @@ -822,25 +695,25 @@ async function processBatch( try { // Create the LLM prompt based on mode const prompt = await createLLMPrompt( - statements, + episodes, spaces, mode, newSpaceId, userId, ); - // Space assignment is LOW complexity (rule-based classification with confidence scores) + // Episode-intent matching is MEDIUM complexity (semantic analysis with intent alignment) let responseText = ""; await makeModelCall(false, prompt, (text: string) => { responseText = text; - }, undefined, 'low'); + }, undefined, 'high'); // Response text is now set by the callback // Parse LLM response const assignments = parseLLMResponseWithTags( responseText, - statements, + episodes, spaces, ); @@ -860,8 +733,8 @@ async function processBatch( // Assign to each space individually to track metadata properly for (const spaceId of assignment.spaceIds) { try { - const result = await assignStatementsToSpace( - [assignment.statementId], + const result = await assignEpisodesToSpace( + [assignment.episodeId], spaceId, userId, ); @@ -871,7 +744,7 @@ async function processBatch( affectedSpaces.add(spaceId); logger.info( - `LLM assigned statement ${assignment.statementId} to space ${spaceId}`, + `LLM assigned episode ${assignment.episodeId} to space ${spaceId}`, { confidence: assignment.confidence, reasoning: assignment.reasoning || "No reasoning", @@ -881,7 +754,7 @@ async function processBatch( } } catch (error) { logger.warn( - `Failed to assign statement ${assignment.statementId} to space ${spaceId}:`, + `Failed to assign episode ${assignment.episodeId} to space ${spaceId}:`, error as Record, ); } @@ -890,7 +763,7 @@ async function processBatch( } return { - processed: statements.length, + processed: episodes.length, assignments: totalAssignments, affectedSpaces: Array.from(affectedSpaces), }; @@ -901,28 +774,28 @@ async function processBatch( } async function createLLMPrompt( - statements: StatementData[], + episodes: EpisodeData[], spaces: Space[], mode: "new_space" | "episode", newSpaceId?: string, userId?: string, ): Promise { - const statementsDescription = statements + const episodesDescription = episodes .map( - (stmt) => - `ID: ${stmt.uuid}\nFACT: ${stmt.fact}\nCURRENT_SPACES: [${stmt.spaceIds.join(", ")}]`, + (ep) => + `ID: ${ep.uuid}\nCONTENT: ${ep.content}\nSOURCE: ${ep.source}\nMETADATA: ${JSON.stringify(ep.metadata)}`, ) .join("\n\n"); - // Get enhanced space information with statement counts + // Get enhanced space information with episode counts const enhancedSpaces = await Promise.all( spaces.map(async (space) => { const currentCount = userId - ? await getSpaceStatementCount(space.id, userId) + ? await getSpaceEpisodeCount(space.id, userId) : 0; return { ...space, - currentStatementCount: currentCount, + currentEpisodeCount: currentCount, }; }), ); @@ -937,14 +810,71 @@ async function createLLMPrompt( return [ { role: "system", - content: `You are analyzing statements for assignment to a newly created space. Focus on semantic relevance to this specific space. + content: `You are analyzing episodes for assignment to a newly created space based on the space's intent and purpose. -INSTRUCTIONS: -1. Analyze each statement's meaning in relation to the new space -2. Only assign statements that are genuinely relevant (confidence >= 0.85) -3. Consider semantic meaning, context, and conceptual fit -4. Be selective - it's better to miss some than include irrelevant ones -5. If a statement doesn't fit the space well, use empty addSpaceId: [] +CORE PRINCIPLE: Match episodes based on WHAT THE EPISODE IS FUNDAMENTALLY ABOUT (its primary subject), not just keyword overlap. + +STEP-BY-STEP FILTERING PROCESS: + +Step 1: IDENTIFY PRIMARY SUBJECT +Ask: "Who or what is this episode fundamentally about?" +- Is it about a specific person? (by name, or "I"/"my" = speaker) +- Is it about a system, tool, or organization? +- Is it about a project, event, or activity? +- Is it about a concept, topic, or idea? + +Step 2: HANDLE IMPLICIT SUBJECTS +- "I prefer..." or "My..." → Subject is the SPEAKER (check episode source/metadata for identity) +- "User discussed..." or "Person X said..." → Subject is that specific person +- "We decided..." → Subject is the group/team/project being discussed +- If unclear, identify from context clues in the episode content + +Step 3: CHECK SUBJECT ALIGNMENT +Does the PRIMARY SUBJECT match what the space is about? +- Match the subject identity (right person/thing/concept?) +- Match the subject relationship (is episode ABOUT the subject or just MENTIONING it?) +- Match the intent purpose (does episode serve the space's purpose?) +- Check scope constraints: If space description includes scope requirements (e.g., "cross-context", "not app-specific", "broadly useful", "stable for 3+ months"), verify episode meets those constraints + +Step 4: DISTINGUISH SUBJECT vs META +Ask: "Is this episode ABOUT the subject itself, or ABOUT discussing/analyzing the subject?" +- ABOUT subject: Episode contains actual content related to subject +- META-discussion: Episode discusses how to handle/analyze/organize the subject +- Only assign if episode is ABOUT the subject, not meta-discussion + +Step 5: VERIFY CONFIDENCE +Only assign if confidence >= 0.75 based on: +- Subject identity clarity (is subject clearly identified?) +- Subject alignment strength (how well does it match space intent?) +- Content relevance (does episode content serve space purpose?) + +CRITICAL RULE: PRIMARY SUBJECT MATCHING +The episode's PRIMARY SUBJECT must match the space's target subject. +- If space is about Person A, episodes about Person B should NOT match (even if same topic) +- If space is about a specific concept, meta-discussions about that concept should NOT match +- If space is about actual behaviors/facts, process discussions about organizing those facts should NOT match + +EXAMPLES OF CORRECT FILTERING: + +Example 1 - Person Identity: +Space: "Alex's work preferences" +Episode A: "I prefer morning meetings and async updates" (speaker: Alex) → ASSIGN ✅ (primary subject: Alex's preferences) +Episode B: "Jordan prefers afternoon meetings" (speaker: System) → DO NOT ASSIGN ❌ (primary subject: Jordan, not Alex) + +Example 2 - Meta vs Actual: +Space: "Recipe collection" +Episode A: "My lasagna recipe: 3 layers pasta, béchamel, meat sauce..." → ASSIGN ✅ (primary subject: actual recipe) +Episode B: "We should organize recipes by cuisine type" → DO NOT ASSIGN ❌ (primary subject: organizing system, not recipe) + +Example 3 - Keyword Overlap Without Subject Match: +Space: "Home renovation project" +Episode A: "Installed new kitchen cabinets, chose oak wood" → ASSIGN ✅ (primary subject: home renovation) +Episode B: "Friend asked advice about their kitchen renovation" → DO NOT ASSIGN ❌ (primary subject: friend's project, not this home) + +Example 4 - Scope Constraints: +Space: "Personal identity and preferences (broadly useful across contexts, not app-specific)" +Episode A: "I prefer async communication and morning work hours" → ASSIGN ✅ (cross-context preference, broadly applicable) +Episode B: "Demonstrated knowledge of ProjectX technical stack" → DO NOT ASSIGN ❌ (work/project knowledge, not personal identity) RESPONSE FORMAT: Provide your response inside tags with a valid JSON array: @@ -952,51 +882,54 @@ Provide your response inside tags with a valid JSON array: [ { - "statementId": "statement-uuid", + "episodeId": "episode-uuid", "addSpaceId": ["${newSpaceId}"], - "confidence": 0.85, + "confidence": 0.75, + "reasoning": "Brief explanation of intent match" } ] -IMPORTANT: If a statement doesn't fit the space well, use empty addSpaceId array: [] -Example of non-relevant statement: {"statementId": "stmt-123", "addSpaceId": [], "confidence": 0.0}`, +IMPORTANT: If an episode doesn't align with the space's intent, use empty addSpaceId array: [] +Example: {"episodeId": "ep-123", "addSpaceId": [], "confidence": 0.0, "reasoning": "No intent alignment"}`, }, { role: "user", content: `NEW SPACE TO POPULATE: Name: ${newSpace.name} -Description: ${newSpace.description || "No description"} -Current Statement Count: ${newSpace.currentStatementCount} -${newSpace.summary ? `Current Summary: ${newSpace.summary}` : ""} -${newSpace.themes && newSpace.themes.length > 0 ? `Existing Themes: ${newSpace.themes.join(", ")}` : ""} +Intent/Purpose: ${newSpace.description || "No description"} +Current Episodes: ${newSpace.currentEpisodeCount} -STATEMENTS TO EVALUATE: -${statementsDescription} +EPISODES TO EVALUATE: +${episodesDescription} -Which statements are semantically relevant to "${newSpace.name}"? Focus on meaning and context. -Only assign statements with confidence >= 0.85.`, +ASSIGNMENT TASK: +For each episode above, follow the step-by-step process to determine if it should be assigned to this space. + +Remember: +1. Identify the PRIMARY SUBJECT of each episode (who/what is it about?) +2. Check if that PRIMARY SUBJECT matches what this space is about +3. If the episode is ABOUT something else (even if it mentions related keywords), do NOT assign +4. If the episode is a META-discussion about the space's topic (not actual content), do NOT assign +5. Only assign if the episode's primary subject aligns with the space's intent AND confidence >= 0.75 + +Provide your analysis and assignments using the specified JSON format.`, }, ]; } else { - // Daily batch mode - consider all spaces + // Episode mode - consider all spaces const spacesDescription = enhancedSpaces - .filter((space) => space.currentStatementCount >= 5) // Only include spaces with 5+ statements for theme validation .map((space) => { const spaceInfo = [ `- ${space.name} (${space.id})`, - ` Description: ${space.description || "No description"}`, - ` Current Statements: ${space.currentStatementCount}`, + ` Intent/Purpose: ${space.description || "No description"}`, + ` Current Episodes: ${space.currentEpisodeCount}`, ]; if (space.summary) { spaceInfo.push(` Summary: ${space.summary}`); } - if (space.themes && space.themes.length > 0) { - spaceInfo.push(` Themes: ${space.themes.join(", ")}`); - } - return spaceInfo.join("\n"); }) .join("\n\n"); @@ -1004,15 +937,77 @@ Only assign statements with confidence >= 0.85.`, return [ { role: "system", - content: `You are an expert at organizing information and assigning statements to relevant semantic spaces. + content: `You are an expert at organizing episodes into semantic spaces based on the space's intent and purpose. -INSTRUCTIONS: -1. Analyze each statement's semantic meaning and context -2. Assign to the most appropriate space(s) - statements can belong to multiple spaces -3. Only assign if confidence >= 0.85 for quality -4. Consider relationships between subject, predicate, and object -5. Only assign to spaces with established themes (5+ statements) -6. If no spaces are relevant, use empty addSpaceId: [] +CORE PRINCIPLE: Match episodes based on WHAT THE EPISODE IS FUNDAMENTALLY ABOUT (its primary subject), not just keyword overlap. + +STEP-BY-STEP FILTERING PROCESS: + +Step 1: IDENTIFY PRIMARY SUBJECT +Ask: "Who or what is this episode fundamentally about?" +- Is it about a specific person? (by name, or "I"/"my" = speaker) +- Is it about a system, tool, or organization? +- Is it about a project, event, or activity? +- Is it about a concept, topic, or idea? + +Step 2: HANDLE IMPLICIT SUBJECTS +- "I prefer..." or "My..." → Subject is the SPEAKER (check episode source/metadata for identity) +- "User discussed..." or "Person X said..." → Subject is that specific person +- "We decided..." → Subject is the group/team/project being discussed +- If unclear, identify from context clues in the episode content + +Step 3: CHECK SUBJECT ALIGNMENT WITH EACH SPACE +For each available space, does the episode's PRIMARY SUBJECT match what that space is about? +- Match the subject identity (right person/thing/concept?) +- Match the subject relationship (is episode ABOUT the subject or just MENTIONING it?) +- Match the intent purpose (does episode serve the space's purpose?) +- An episode can match multiple spaces if its primary subject serves multiple intents + +Step 4: DISTINGUISH SUBJECT vs META +Ask: "Is this episode ABOUT the subject itself, or ABOUT discussing/analyzing the subject?" +- ABOUT subject: Episode contains actual content related to subject +- META-discussion: Episode discusses how to handle/analyze/organize the subject +- Only assign if episode is ABOUT the subject, not meta-discussion + +Step 5: VERIFY CONFIDENCE +Only assign to a space if confidence >= 0.75 based on: +- Subject identity clarity (is subject clearly identified?) +- Subject alignment strength (how well does it match space intent?) +- Content relevance (does episode content serve space purpose?) + +Step 6: MULTI-SPACE ASSIGNMENT +- An episode can belong to multiple spaces if its primary subject serves multiple intents +- Each space assignment should meet the >= 0.75 confidence threshold independently +- If no spaces match, use empty addSpaceId: [] + +CRITICAL RULE: PRIMARY SUBJECT MATCHING +The episode's PRIMARY SUBJECT must match the space's target subject. +- If space is about Person A, episodes about Person B should NOT match (even if same topic) +- If space is about a specific concept, meta-discussions about that concept should NOT match +- If space is about actual behaviors/facts, process discussions about organizing those facts should NOT match + +EXAMPLES OF CORRECT FILTERING: + +Example 1 - Person Identity: +Space: "Alex's work preferences" +Episode A: "I prefer morning meetings and async updates" (speaker: Alex) → ASSIGN ✅ (primary subject: Alex's preferences) +Episode B: "Jordan prefers afternoon meetings" (speaker: System) → DO NOT ASSIGN ❌ (primary subject: Jordan, not Alex) + +Example 2 - Meta vs Actual: +Space: "Recipe collection" +Episode A: "My lasagna recipe: 3 layers pasta, béchamel, meat sauce..." → ASSIGN ✅ (primary subject: actual recipe) +Episode B: "We should organize recipes by cuisine type" → DO NOT ASSIGN ❌ (primary subject: organizing system, not recipe) + +Example 3 - Keyword Overlap Without Subject Match: +Space: "Home renovation project" +Episode A: "Installed new kitchen cabinets, chose oak wood" → ASSIGN ✅ (primary subject: home renovation) +Episode B: "Friend asked advice about their kitchen renovation" → DO NOT ASSIGN ❌ (primary subject: friend's project, not this home) + +Example 4 - Scope Constraints: +Space: "Personal identity and preferences (broadly useful across contexts, not app-specific)" +Episode A: "I prefer async communication and morning work hours" → ASSIGN ✅ (cross-context preference, broadly applicable) +Episode B: "I format task titles as {verb}: {title} in TaskApp" → DO NOT ASSIGN ❌ (app-specific behavior, fails "not app-specific" constraint) +Episode C: "Demonstrated knowledge of ProjectX technical stack" → DO NOT ASSIGN ❌ (work/project knowledge, not personal identity) RESPONSE FORMAT: Provide your response inside tags with a valid JSON array: @@ -1020,26 +1015,37 @@ Provide your response inside tags with a valid JSON array: [ { - "statementId": "statement-uuid", + "episodeId": "episode-uuid", "addSpaceId": ["space-uuid1", "space-uuid2"], - "confidence": 0.85 + "confidence": 0.75, + "reasoning": "Brief explanation of intent match" } ] -IMPORTANT: If no spaces are relevant, use empty addSpaceId array: [] -Example of non-relevant statement: {"statementId": "stmt-123", "addSpaceId": [], "confidence": 0.0}`, +IMPORTANT: If no spaces' intents align with an episode, use empty addSpaceId array: [] +Example: {"episodeId": "ep-123", "addSpaceId": [], "confidence": 0.0, "reasoning": "No matching space intent"}`, }, { role: "user", - content: `AVAILABLE SPACES (with established themes - 5+ statements): + content: `AVAILABLE SPACES (with their intents/purposes): ${spacesDescription} -STATEMENTS TO ORGANIZE: -${statementsDescription} +EPISODES TO ORGANIZE: +${episodesDescription} -Assign each statement to the most semantically relevant space(s). Consider meaning over keywords. -Only assign to spaces with established themes (5+ statements) and with confidence >= 0.85.`, +ASSIGNMENT TASK: +For each episode above, follow the step-by-step process to determine which space(s) it should be assigned to. + +Remember: +1. Identify the PRIMARY SUBJECT of each episode (who/what is it about?) +2. Check if that PRIMARY SUBJECT matches what each space is about +3. If the episode is ABOUT something else (even if it mentions related keywords), do NOT assign to that space +4. If the episode is a META-discussion about a space's topic (not actual content), do NOT assign to that space +5. An episode can be assigned to multiple spaces if its primary subject serves multiple intents +6. Only assign if the episode's primary subject aligns with the space's intent AND confidence >= 0.75 for that space + +Provide your analysis and assignments using the specified JSON format.`, }, ]; } @@ -1047,7 +1053,7 @@ Only assign to spaces with established themes (5+ statements) and with confidenc function parseLLMResponseWithTags( response: string, - statements: StatementData[], + episodes: EpisodeData[], spaces: Space[], ): AssignmentResult[] { try { @@ -1057,7 +1063,7 @@ function parseLLMResponseWithTags( logger.warn( "No tags found in LLM response, falling back to full response parsing", ); - return parseLLMResponse(response, statements, spaces); + return parseLLMResponse(response, episodes, spaces); } const jsonContent = outputMatch[1].trim(); @@ -1071,14 +1077,14 @@ function parseLLMResponseWithTags( } const validSpaceIds = new Set(spaces.map((s) => s.id)); - const validStatementIds = new Set(statements.map((s) => s.uuid)); + const validEpisodeIds = new Set(episodes.map((e) => e.uuid)); return parsed .filter((assignment: any) => { // Validate assignment structure if ( - !assignment.statementId || - !validStatementIds.has(assignment.statementId) + !assignment.episodeId || + !validEpisodeIds.has(assignment.episodeId) ) { return false; } @@ -1096,9 +1102,10 @@ function parseLLMResponseWithTags( return true; }) .map((assignment: any) => ({ - statementId: assignment.statementId, + episodeId: assignment.episodeId, spaceIds: assignment.addSpaceId, - confidence: assignment.confidence || 0.8, + confidence: assignment.confidence || 0.75, + reasoning: assignment.reasoning, })); } catch (error) { logger.error( @@ -1107,13 +1114,13 @@ function parseLLMResponseWithTags( ); logger.debug("Raw LLM response:", { response } as Record); // Fallback to regular parsing - return parseLLMResponse(response, statements, spaces); + return parseLLMResponse(response, episodes, spaces); } } function parseLLMResponse( response: string, - statements: StatementData[], + episodes: EpisodeData[], spaces: Space[], ): AssignmentResult[] { try { @@ -1131,14 +1138,14 @@ function parseLLMResponse( } const validSpaceIds = new Set(spaces.map((s) => s.id)); - const validStatementIds = new Set(statements.map((s) => s.uuid)); + const validEpisodeIds = new Set(episodes.map((e) => e.uuid)); return parsed.assignments .filter((assignment: any) => { // Validate assignment structure if ( - !assignment.statementId || - !validStatementIds.has(assignment.statementId) + !assignment.episodeId || + !validEpisodeIds.has(assignment.episodeId) ) { return false; } @@ -1155,9 +1162,10 @@ function parseLLMResponse( return true; }) .map((assignment: any) => ({ - statementId: assignment.statementId, + episodeId: assignment.episodeId, spaceIds: assignment.spaceIds, - confidence: assignment.confidence || 0.8, + confidence: assignment.confidence || 0.75, + reasoning: assignment.reasoning, })); } catch (error) { logger.error( diff --git a/apps/webapp/app/trigger/spaces/space-summary.ts b/apps/webapp/app/trigger/spaces/space-summary.ts index 9504bd5..dfb62c1 100644 --- a/apps/webapp/app/trigger/spaces/space-summary.ts +++ b/apps/webapp/app/trigger/spaces/space-summary.ts @@ -10,7 +10,7 @@ import { triggerSpacePattern } from "./space-pattern"; import { getSpace, updateSpace } from "../utils/space-utils"; import { EpisodeType } from "@core/types"; -import { getSpaceStatementCount } from "~/services/graphModels/space"; +import { getSpaceEpisodeCount } from "~/services/graphModels/space"; import { addToQueue } from "../utils/queue"; interface SpaceSummaryPayload { @@ -35,7 +35,7 @@ interface SpaceSummaryData { spaceId: string; spaceName: string; spaceDescription?: string; - statementCount: number; + contextCount: number; summary: string; keyEntities: string[]; themes: string[]; @@ -55,7 +55,7 @@ const SummaryResultSchema = z.object({ const CONFIG = { maxEpisodesForSummary: 20, // Limit episodes for performance minEpisodesForSummary: 1, // Minimum episodes to generate summary - summaryPromptTokenLimit: 4000, // Approximate token limit for prompt + summaryEpisodeThreshold: 10, // Minimum new episodes required to trigger summary (configurable) }; export const spaceSummaryQueue = queue({ @@ -85,7 +85,7 @@ export const spaceSummaryTask = task({ }); // Generate summary for the single space - const summaryResult = await generateSpaceSummary(spaceId, userId); + const summaryResult = await generateSpaceSummary(spaceId, userId, triggerSource); if (summaryResult) { // Store the summary @@ -98,36 +98,24 @@ export const spaceSummaryTask = task({ metadata: { triggerSource, phase: "completed_summary", - statementCount: summaryResult.statementCount, + contextCount: summaryResult.contextCount, confidence: summaryResult.confidence, }, }); logger.info(`Generated summary for space ${spaceId}`, { - statementCount: summaryResult.statementCount, + statementCount: summaryResult.contextCount, confidence: summaryResult.confidence, themes: summaryResult.themes.length, triggerSource, }); - // Ingest summary as document if it exists and continue with patterns - if (!summaryResult.isIncremental && summaryResult.statementCount > 0) { - await processSpaceSummarySequentially({ - userId, - workspaceId, - spaceId, - spaceName: summaryResult.spaceName, - summaryContent: summaryResult.summary, - triggerSource: "summary_complete", - }); - } - return { success: true, spaceId, triggerSource, summary: { - statementCount: summaryResult.statementCount, + statementCount: summaryResult.contextCount, confidence: summaryResult.confidence, themesCount: summaryResult.themes.length, }, @@ -186,6 +174,7 @@ export const spaceSummaryTask = task({ async function generateSpaceSummary( spaceId: string, userId: string, + triggerSource?: "assignment" | "manual" | "scheduled", ): Promise { try { // 1. Get space details @@ -197,6 +186,35 @@ async function generateSpaceSummary( return null; } + // 2. Check episode count threshold (skip for manual triggers) + if (triggerSource !== "manual") { + const currentEpisodeCount = await getSpaceEpisodeCount(spaceId, userId); + const lastSummaryEpisodeCount = space.contextCount || 0; + const episodeDifference = currentEpisodeCount - lastSummaryEpisodeCount; + + if (episodeDifference < CONFIG.summaryEpisodeThreshold) { + logger.info( + `Skipping summary generation for space ${spaceId}: only ${episodeDifference} new episodes (threshold: ${CONFIG.summaryEpisodeThreshold})`, + { + currentEpisodeCount, + lastSummaryEpisodeCount, + episodeDifference, + threshold: CONFIG.summaryEpisodeThreshold, + } + ); + return null; + } + + logger.info( + `Proceeding with summary generation for space ${spaceId}: ${episodeDifference} new episodes (threshold: ${CONFIG.summaryEpisodeThreshold})`, + { + currentEpisodeCount, + lastSummaryEpisodeCount, + episodeDifference, + } + ); + } + // 2. Check for existing summary const existingSummary = await getExistingSummary(spaceId); const isIncremental = existingSummary !== null; @@ -296,14 +314,14 @@ async function generateSpaceSummary( return null; } - // Get the actual current statement count from Neo4j - const currentStatementCount = await getSpaceStatementCount(spaceId, userId); + // Get the actual current counts from Neo4j + const currentEpisodeCount = await getSpaceEpisodeCount(spaceId, userId); return { spaceId: space.uuid, spaceName: space.name, spaceDescription: space.description as string, - statementCount: currentStatementCount, + contextCount: currentEpisodeCount, summary: summaryResult.summary, keyEntities: summaryResult.keyEntities || [], themes: summaryResult.themes, @@ -400,38 +418,48 @@ function createUnifiedSummaryPrompt( return [ { role: "system", - content: `You are an expert at analyzing and summarizing structured knowledge within semantic spaces. Your task is to ${isUpdate ? "update an existing summary by integrating new episodes" : "create a comprehensive summary of episodes"}. + content: `You are an expert at analyzing and summarizing episodes within semantic spaces based on the space's intent and purpose. Your task is to ${isUpdate ? "update an existing summary by integrating new episodes" : "create a comprehensive summary of episodes"}. CRITICAL RULES: 1. Base your summary ONLY on insights derived from the actual content/episodes provided -2. Use the space description only as contextual guidance, never copy or paraphrase it +2. Use the space's INTENT/PURPOSE (from description) to guide what to summarize and how to organize it 3. Write in a factual, neutral tone - avoid promotional language ("pivotal", "invaluable", "cutting-edge") -4. Be specific and concrete - reference actual content, patterns, and themes found in the episodes +4. Be specific and concrete - reference actual content, patterns, and insights found in the episodes 5. If episodes are insufficient for meaningful insights, state that more data is needed +INTENT-DRIVEN SUMMARIZATION: +Your summary should SERVE the space's intended purpose. Examples: +- "Learning React" → Summarize React concepts, patterns, techniques learned +- "Project X Updates" → Summarize progress, decisions, blockers, next steps +- "Health Tracking" → Summarize metrics, trends, observations, insights +- "Guidelines for React" → Extract actionable patterns, best practices, rules +- "Evolution of design thinking" → Track how thinking changed over time, decision points +The intent defines WHY this space exists - organize content to serve that purpose. + INSTRUCTIONS: ${ isUpdate ? `1. Review the existing summary and themes carefully -2. Analyze the new episodes for patterns and insights +2. Analyze the new episodes for patterns and insights that align with the space's intent 3. Identify connecting points between existing knowledge and new episodes 4. Update the summary to seamlessly integrate new information while preserving valuable existing insights -5. Evolve themes by adding new ones or refining existing ones based on connections found -6. Update the markdown summary to reflect the enhanced themes and new insights` +5. Evolve themes by adding new ones or refining existing ones based on the space's purpose +6. Organize the summary to serve the space's intended use case` : `1. Analyze the semantic content and relationships within the episodes -2. Identify the main themes and patterns across all episodes (themes must have at least 3 supporting episodes) -3. Create a coherent summary that captures the essence of this knowledge domain -4. Generate a well-structured markdown summary organized by the identified themes` +2. Identify topics/sections that align with the space's INTENT and PURPOSE +3. Create a coherent summary that serves the space's intended use case +4. Organize the summary based on the space's purpose (not generic frequency-based themes)` } -${isUpdate ? "7" : "6"}. Assess your confidence in the ${isUpdate ? "updated" : ""} summary quality (0.0-1.0) +${isUpdate ? "7" : "5"}. Assess your confidence in the ${isUpdate ? "updated" : ""} summary quality (0.0-1.0) -THEME IDENTIFICATION RULES: -- A theme must be supported by AT LEAST 3 related episodes to be considered valid -- Themes should represent substantial, meaningful patterns rather than minor occurrences -- Each theme must capture a distinct semantic domain or conceptual area -- Only identify themes that have sufficient evidence in the data -- If fewer than 3 episodes support a potential theme, do not include it -- Themes will be used to organize the markdown summary into logical sections +INTENT-ALIGNED ORGANIZATION: +- Organize sections based on what serves the space's purpose +- Topics don't need minimum episode counts - relevance to intent matters most +- Each section should provide value aligned with the space's intended use +- For "guidelines" spaces: focus on actionable patterns +- For "tracking" spaces: focus on temporal patterns and changes +- For "learning" spaces: focus on concepts and insights gained +- Let the space's intent drive the structure, not rigid rules ${ isUpdate @@ -484,7 +512,7 @@ ${ role: "user", content: `SPACE INFORMATION: Name: "${spaceName}" -Description (for context only): ${spaceDescription || "No description provided"} +Intent/Purpose: ${spaceDescription || "No specific intent provided - organize naturally based on content"} ${ isUpdate @@ -508,8 +536,8 @@ ${topEntities.join(", ")}` ${ isUpdate - ? "Please identify connections between the existing summary and new episodes, then update the summary to integrate the new insights coherently. Remember: only summarize insights from the actual episode content, not the space description." - : "Please analyze the episodes and provide a comprehensive summary that captures insights derived from the episode content provided. Use the description only as context. If there are too few episodes to generate meaningful insights, indicate that more data is needed rather than falling back on the description." + ? "Please identify connections between the existing summary and new episodes, then update the summary to integrate the new insights coherently. Organize the summary to SERVE the space's intent/purpose. Remember: only summarize insights from the actual episode content." + : "Please analyze the episodes and provide a comprehensive summary that SERVES the space's intent/purpose. Organize sections based on what would be most valuable for this space's intended use case. If the intent is unclear, organize naturally based on content patterns. Only summarize insights from actual episode content." }`, }, ]; @@ -519,7 +547,7 @@ async function getExistingSummary(spaceId: string): Promise<{ summary: string; themes: string[]; lastUpdated: Date; - statementCount: number; + contextCount: number; } | null> { try { const existingSummary = await getSpace(spaceId); @@ -528,8 +556,8 @@ async function getExistingSummary(spaceId: string): Promise<{ return { summary: existingSummary.summary, themes: existingSummary.themes, - lastUpdated: existingSummary.lastPatternTrigger || new Date(), - statementCount: existingSummary.statementCount || 0, + lastUpdated: existingSummary.summaryGeneratedAt || new Date(), + contextCount: existingSummary.contextCount || 0, }; } @@ -547,24 +575,18 @@ async function getSpaceEpisodes( userId: string, sinceDate?: Date, ): Promise { - // Build query to get distinct episodes that have statements in the space - let whereClause = - "s.spaceIds IS NOT NULL AND $spaceId IN s.spaceIds AND s.invalidAt IS NULL"; + // Query episodes directly using Space-[:HAS_EPISODE]->Episode relationships const params: any = { spaceId, userId }; - // Store the sinceDate condition separately to apply after e is defined let dateCondition = ""; if (sinceDate) { - dateCondition = "e.createdAt > $sinceDate"; + dateCondition = "AND e.createdAt > $sinceDate"; params.sinceDate = sinceDate.toISOString(); } const query = ` - MATCH (s:Statement{userId: $userId}) - WHERE ${whereClause} - OPTIONAL MATCH (e:Episode{userId: $userId})-[:HAS_PROVENANCE]->(s) - WITH e - WHERE e IS NOT NULL ${dateCondition ? `AND ${dateCondition}` : ""} + MATCH (space:Space {uuid: $spaceId, userId: $userId})-[:HAS_EPISODE]->(e:Episode {userId: $userId}) + WHERE e IS NOT NULL ${dateCondition} RETURN DISTINCT e ORDER BY e.createdAt DESC `; @@ -654,7 +676,7 @@ async function storeSummary(summaryData: SpaceSummaryData): Promise { space.keyEntities = $keyEntities, space.themes = $themes, space.summaryConfidence = $confidence, - space.summaryStatementCount = $statementCount, + space.summaryContextCount = $contextCount, space.summaryLastUpdated = datetime($lastUpdated) RETURN space `; @@ -665,7 +687,7 @@ async function storeSummary(summaryData: SpaceSummaryData): Promise { keyEntities: summaryData.keyEntities, themes: summaryData.themes, confidence: summaryData.confidence, - statementCount: summaryData.statementCount, + contextCount: summaryData.contextCount, lastUpdated: summaryData.lastUpdated.toISOString(), }); diff --git a/apps/webapp/app/trigger/utils/space-utils.ts b/apps/webapp/app/trigger/utils/space-utils.ts index 462e3c7..8144c56 100644 --- a/apps/webapp/app/trigger/utils/space-utils.ts +++ b/apps/webapp/app/trigger/utils/space-utils.ts @@ -31,7 +31,7 @@ export const updateSpace = async (summaryData: { spaceId: string; summary: string; themes: string[]; - statementCount: number; + contextCount: number; }) => { return await prisma.space.update({ where: { @@ -40,7 +40,8 @@ export const updateSpace = async (summaryData: { data: { summary: summaryData.summary, themes: summaryData.themes, - statementCount: summaryData.statementCount, + contextCount: summaryData.contextCount, + summaryGeneratedAt: new Date().toISOString() }, }); }; diff --git a/packages/database/prisma/migrations/20251007175613_modify_space_context_count/migration.sql b/packages/database/prisma/migrations/20251007175613_modify_space_context_count/migration.sql new file mode 100644 index 0000000..1edd723 --- /dev/null +++ b/packages/database/prisma/migrations/20251007175613_modify_space_context_count/migration.sql @@ -0,0 +1,12 @@ +/* + Warnings: + + - You are about to drop the column `statementCount` on the `Space` table. All the data in the column will be lost. + - You are about to drop the column `statementCountAtLastTrigger` on the `Space` table. All the data in the column will be lost. + +*/ +-- AlterTable +ALTER TABLE "Space" DROP COLUMN "statementCount", +DROP COLUMN "statementCountAtLastTrigger", +ADD COLUMN "contextCount" INTEGER, +ADD COLUMN "contextCountAtLastTrigger" INTEGER; diff --git a/packages/database/prisma/migrations/20251007181423_add_space_summary_generate_at/migration.sql b/packages/database/prisma/migrations/20251007181423_add_space_summary_generate_at/migration.sql new file mode 100644 index 0000000..2df8ebc --- /dev/null +++ b/packages/database/prisma/migrations/20251007181423_add_space_summary_generate_at/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "Space" ADD COLUMN "summaryGeneratedAt" TIMESTAMP(3); diff --git a/packages/database/prisma/schema.prisma b/packages/database/prisma/schema.prisma index ae4af39..9e90491 100644 --- a/packages/database/prisma/schema.prisma +++ b/packages/database/prisma/schema.prisma @@ -472,20 +472,21 @@ model RecallLog { } model Space { - id String @id @default(cuid()) - name String - description String? - autoMode Boolean @default(false) - summary String? - themes String[] - statementCount Int? + id String @id @default(cuid()) + name String + description String? + autoMode Boolean @default(false) + summary String? + themes String[] + contextCount Int? // Count of context items in this space (episodes, statements, etc.) status String? icon String? - lastPatternTrigger DateTime? - statementCountAtLastTrigger Int? + lastPatternTrigger DateTime? + summaryGeneratedAt DateTime? + contextCountAtLastTrigger Int? // Context count when pattern was last triggered // Relations workspace Workspace @relation(fields: [workspaceId], references: [id]) diff --git a/packages/types/src/space.ts b/packages/types/src/space.ts index 0ffeead..e3dbc5b 100644 --- a/packages/types/src/space.ts +++ b/packages/types/src/space.ts @@ -6,7 +6,7 @@ export interface SpaceNode { createdAt: Date; updatedAt: Date; isActive: boolean; - statementCount?: number; // Computed field + contextCount?: number; // Computed field - count of episodes assigned to this space embedding?: number[]; // For future space similarity }