From bc2deea9984ae164cb6f3f7f8340223113429a92 Mon Sep 17 00:00:00 2001 From: Harshith Mullapudi Date: Tue, 28 Oct 2025 23:42:34 +0530 Subject: [PATCH] feat: connected space creation --- README.md | 51 +- apps/webapp/app/bullmq/queues/index.ts | 45 +- apps/webapp/app/bullmq/workers/index.ts | 67 +- .../app/jobs/bert/topic-analysis.logic.ts | 203 ++- .../app/jobs/ingest/ingest-episode.logic.ts | 2 + .../app/jobs/spaces/space-assignment.logic.ts | 1201 +++++++++++++++++ .../jobs/spaces/space-identification.logic.ts | 229 ++++ .../app/jobs/spaces/space-summary.logic.ts | 721 ++++++++++ apps/webapp/app/lib/queue-adapter.server.ts | 48 +- .../app/routes/api.v1.conversation._index.tsx | 6 +- apps/webapp/app/routes/api.v1.logs.tsx | 1 + apps/webapp/app/services/clustering.server.ts | 0 apps/webapp/app/services/graphModels/space.ts | 37 + apps/webapp/app/services/mcp.server.ts | 1 + apps/webapp/app/services/space.server.ts | 39 +- .../webapp/app/trigger/bert/topic-analysis.ts | 32 +- .../app/trigger/ingest/retry-no-credits.ts | 8 +- .../app/trigger/spaces/space-assignment.ts | 1185 +--------------- .../app/trigger/spaces/space-summary.ts | 791 +---------- apps/webapp/app/trigger/utils/space-utils.ts | 19 +- apps/webapp/app/utils/mcp/memory.ts | 50 +- apps/webapp/package.json | 11 +- apps/webapp/trigger.config.ts | 5 + docs/self-hosting/docker.mdx | 7 +- docs/self-hosting/overview.mdx | 2 +- hosting/docker/docker-compose.yaml | 98 +- pnpm-lock.yaml | 148 +- 27 files changed, 2846 insertions(+), 2161 deletions(-) create mode 100644 apps/webapp/app/jobs/spaces/space-assignment.logic.ts create mode 100644 apps/webapp/app/jobs/spaces/space-identification.logic.ts create mode 100644 apps/webapp/app/jobs/spaces/space-summary.logic.ts delete mode 100644 apps/webapp/app/services/clustering.server.ts diff --git a/README.md b/README.md index aeea5a1..5a194eb 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ CORE memory achieves **88.24%** average accuracy in Locomo dataset across all re ## Overview -**Problem** +**Problem** Developers waste time re-explaining context to AI tools. Hit token limits in Claude? Start fresh and lose everything. Switch from ChatGPT/Claude to Cursor? Explain your context again. Your conversations, decisions, and insights vanish between sessions. With every new AI tool, the cost of context switching grows. @@ -64,6 +64,7 @@ Developers waste time re-explaining context to AI tools. Hit token limits in Cla CORE is an open-source unified, persistent memory layer for all your AI tools. Your context follows you from Cursor to Claude to ChatGPT to Claude Code. One knowledge graph remembers who said what, when, and why. Connect once, remember everywhere. Stop managing context and start building. ## πŸš€ CORE Self-Hosting + Want to run CORE on your own infrastructure? Self-hosting gives you complete control over your data and deployment. **Quick Deploy Options:** @@ -80,15 +81,20 @@ Want to run CORE on your own infrastructure? Self-hosting gives you complete con ### Setup 1. Clone the repository: + ``` git clone https://github.com/RedPlanetHQ/core.git cd core ``` + 2. Configure environment variables in `core/.env`: + ``` OPENAI_API_KEY=your_openai_api_key ``` + 3. Start the service + ``` docker-compose up -d ``` @@ -100,6 +106,7 @@ Once deployed, you can configure your AI providers (OpenAI, Anthropic) and start Note: We tried open-source models like Ollama or GPT OSS but facts generation were not good, we are still figuring out how to improve on that and then will also support OSS models. ## πŸš€ CORE Cloud + **Build your unified memory graph in 5 minutes:** Don't want to manage infrastructure? CORE Cloud lets you build your personal memory system instantly - no setup, no servers, just memory that works. @@ -115,24 +122,24 @@ Don't want to manage infrastructure? CORE Cloud lets you build your personal mem ## 🧩 Key Features -### 🧠 **Unified, Portable Memory**: +### 🧠 **Unified, Portable Memory**: + Add and recall your memory across **Cursor, Windsurf, Claude Desktop, Claude Code, Gemini CLI, AWS's Kiro, VS Code, and Roo Code** via MCP ![core-claude](https://github.com/user-attachments/assets/56c98288-ee87-4cd0-8b02-860aca1c7f9a) - -### πŸ•ΈοΈ **Temporal + Reified Knowledge Graph**: +### πŸ•ΈοΈ **Temporal + Reified Knowledge Graph**: Remember the story behind every factβ€”track who said what, when, and why with rich relationships and full provenance, not just flat storage ![core-memory-graph](https://github.com/user-attachments/assets/5d1ee659-d519-4624-85d1-e0497cbdd60a) - -### 🌐 **Browser Extension**: +### 🌐 **Browser Extension**: Save conversations and content from ChatGPT, Grok, Gemini, Twitter, YouTube, blog posts, and any webpage directly into your CORE memory. **How to Use Extension** + 1. [Download the Extension](https://chromewebstore.google.com/detail/core-extension/cglndoindnhdbfcbijikibfjoholdjcc) from the Chrome Web Store. 2. Login to [CORE dashboard](https://core.heysol.ai) - Navigate to Settings (bottom left) @@ -141,13 +148,12 @@ Save conversations and content from ChatGPT, Grok, Gemini, Twitter, YouTube, blo https://github.com/user-attachments/assets/6e629834-1b9d-4fe6-ae58-a9068986036a +### πŸ’¬ **Chat with Memory**: -### πŸ’¬ **Chat with Memory**: Ask questions like "What are my writing preferences?" with instant insights from your connected knowledge ![chat-with-memory](https://github.com/user-attachments/assets/d798802f-bd51-4daf-b2b5-46de7d206f66) - ### ⚑ **Auto-Sync from Apps**: Automatically capture relevant context from Linear, Slack, Notion, GitHub and other connected apps into your CORE memory @@ -156,16 +162,12 @@ Automatically capture relevant context from Linear, Slack, Notion, GitHub and ot ![core-slack](https://github.com/user-attachments/assets/d5fefe38-221e-4076-8a44-8ed673960f03) - -### πŸ”— **MCP Integration Hub**: +### πŸ”— **MCP Integration Hub**: Connect Linear, Slack, GitHub, Notion once to COREβ€”then use all their tools in Claude, Cursor, or any MCP client with a single URL - ![core-linear-claude](https://github.com/user-attachments/assets/7d59d92b-8c56-4745-a7ab-9a3c0341aa32) - - ## How CORE create memory memory-ingest-diagram @@ -179,7 +181,6 @@ CORE’s ingestion pipeline has four phases designed to capture evolving context The Result: Instead of a flat database, CORE gives you a memory that grows and changes with you - preserving context, evolution, and ownership so agents can actually use it. - ![memory-ingest-eg](https://github.com/user-attachments/assets/1d0a8007-153a-4842-9586-f6f4de43e647) ## How CORE recalls from memory @@ -204,7 +205,7 @@ Explore our documentation to get the most out of CORE - [Connect Core MCP with Claude](https://docs.heysol.ai/providers/claude) - [Connect Core MCP with Cursor](https://docs.heysol.ai/providers/cursor) - [Connect Core MCP with Claude Code](https://docs.heysol.ai/providers/claude-code) -- [Connect Core MCP with Codex](https://docs.heysol.ai/providers/codex) +- [Connect Core MCP with Codex](https://docs.heysol.ai/providers/codex) - [Basic Concepts](https://docs.heysol.ai/overview) - [API Reference](https://docs.heysol.ai/api-reference/get-user-profile) @@ -249,21 +250,11 @@ Have questions or feedback? We're here to help: +<<<<<<< Updated upstream +<<<<<<< HEAD +# ======= - - - - - - - - - - - - - - - +> > > > > > > Stashed changes +> > > > > > > 62db6c1 (feat: automatic space identification) diff --git a/apps/webapp/app/bullmq/queues/index.ts b/apps/webapp/app/bullmq/queues/index.ts index 7356c96..fb627af 100644 --- a/apps/webapp/app/bullmq/queues/index.ts +++ b/apps/webapp/app/bullmq/queues/index.ts @@ -105,7 +105,6 @@ export const bertTopicQueue = new Queue("bert-topic-queue", { type: "exponential", delay: 5000, }, - timeout: 300000, // 5 minute timeout removeOnComplete: { age: 7200, // Keep completed jobs for 2 hours count: 100, @@ -115,3 +114,47 @@ export const bertTopicQueue = new Queue("bert-topic-queue", { }, }, }); + +/** + * Space assignment queue + * Handles assigning episodes to spaces based on semantic matching + */ +export const spaceAssignmentQueue = new Queue("space-assignment-queue", { + connection: getRedisConnection(), + defaultJobOptions: { + attempts: 3, + backoff: { + type: "exponential", + delay: 2000, + }, + removeOnComplete: { + age: 3600, + count: 1000, + }, + removeOnFail: { + age: 86400, + }, + }, +}); + +/** + * Space summary queue + * Handles generating summaries for spaces + */ +export const spaceSummaryQueue = new Queue("space-summary-queue", { + connection: getRedisConnection(), + defaultJobOptions: { + attempts: 3, + backoff: { + type: "exponential", + delay: 2000, + }, + removeOnComplete: { + age: 3600, + count: 1000, + }, + removeOnFail: { + age: 86400, + }, + }, +}); diff --git a/apps/webapp/app/bullmq/workers/index.ts b/apps/webapp/app/bullmq/workers/index.ts index ca8eb96..3b58656 100644 --- a/apps/webapp/app/bullmq/workers/index.ts +++ b/apps/webapp/app/bullmq/workers/index.ts @@ -18,7 +18,6 @@ import { processConversationTitleCreation, type CreateConversationTitlePayload, } from "~/jobs/conversation/create-title.logic"; - import { processSessionCompaction, type SessionCompactionPayload, @@ -27,20 +26,31 @@ import { processTopicAnalysis, type TopicAnalysisPayload, } from "~/jobs/bert/topic-analysis.logic"; + import { enqueueIngestEpisode, enqueueSpaceAssignment, enqueueSessionCompaction, enqueueBertTopicAnalysis, + enqueueSpaceSummary, } from "~/lib/queue-adapter.server"; import { logger } from "~/services/logger.service"; +import { + processSpaceAssignment, + type SpaceAssignmentPayload, +} from "~/jobs/spaces/space-assignment.logic"; +import { + processSpaceSummary, + type SpaceSummaryPayload, +} from "~/jobs/spaces/space-summary.logic"; /** * Episode ingestion worker - * Processes individual episode ingestion jobs with per-user concurrency + * Processes individual episode ingestion jobs with global concurrency * - * Note: Per-user concurrency is achieved by using userId as part of the jobId - * when adding jobs to the queue, ensuring only one job per user runs at a time + * Note: BullMQ uses global concurrency limit (5 jobs max). + * Trigger.dev uses per-user concurrency via concurrencyKey. + * For most open-source deployments, global concurrency is sufficient. */ export const ingestWorker = new Worker( "ingest-queue", @@ -57,7 +67,7 @@ export const ingestWorker = new Worker( }, { connection: getRedisConnection(), - concurrency: 5, // Process up to 5 jobs in parallel + concurrency: 1, // Global limit: process up to 1 jobs in parallel }, ); @@ -122,7 +132,11 @@ export const bertTopicWorker = new Worker( "bert-topic-queue", async (job) => { const payload = job.data as TopicAnalysisPayload; - return await processTopicAnalysis(payload); + return await processTopicAnalysis( + payload, + // Callback to enqueue space summary + enqueueSpaceSummary, + ); }, { connection: getRedisConnection(), @@ -130,6 +144,45 @@ export const bertTopicWorker = new Worker( }, ); +/** + * Space assignment worker + * Handles assigning episodes to spaces based on semantic matching + * + * Note: Global concurrency of 1 ensures sequential processing. + * Trigger.dev uses per-user concurrency via concurrencyKey. + */ +export const spaceAssignmentWorker = new Worker( + "space-assignment-queue", + async (job) => { + const payload = job.data as SpaceAssignmentPayload; + return await processSpaceAssignment( + payload, + // Callback to enqueue space summary + enqueueSpaceSummary, + ); + }, + { + connection: getRedisConnection(), + concurrency: 1, // Global limit: process one job at a time + }, +); + +/** + * Space summary worker + * Handles generating summaries for spaces + */ +export const spaceSummaryWorker = new Worker( + "space-summary-queue", + async (job) => { + const payload = job.data as SpaceSummaryPayload; + return await processSpaceSummary(payload); + }, + { + connection: getRedisConnection(), + concurrency: 1, // Process one space summary at a time + }, +); + /** * Graceful shutdown handler */ @@ -140,6 +193,8 @@ export async function closeAllWorkers(): Promise { conversationTitleWorker.close(), sessionCompactionWorker.close(), bertTopicWorker.close(), + spaceSummaryWorker.close(), + spaceAssignmentWorker.close(), ]); logger.log("All BullMQ workers closed"); } diff --git a/apps/webapp/app/jobs/bert/topic-analysis.logic.ts b/apps/webapp/app/jobs/bert/topic-analysis.logic.ts index 43900b5..e1b2bb0 100644 --- a/apps/webapp/app/jobs/bert/topic-analysis.logic.ts +++ b/apps/webapp/app/jobs/bert/topic-analysis.logic.ts @@ -1,10 +1,16 @@ import { exec } from "child_process"; import { promisify } from "util"; +import { identifySpacesForTopics } from "~/jobs/spaces/space-identification.logic"; +import { assignEpisodesToSpace } from "~/services/graphModels/space"; +import { logger } from "~/services/logger.service"; +import { SpaceService } from "~/services/space.server"; +import { prisma } from "~/trigger/utils/prisma"; const execAsync = promisify(exec); export interface TopicAnalysisPayload { userId: string; + workspaceId: string; minTopicSize?: number; nrTopics?: number; } @@ -19,24 +25,13 @@ export interface TopicAnalysisResult { } /** - * Process BERT topic analysis on user's episodes - * This is the common logic shared between Trigger.dev and BullMQ - * - * NOTE: This function does NOT update workspace.metadata.lastTopicAnalysisAt - * That should be done by the caller BEFORE enqueueing this job to prevent - * duplicate analyses from racing conditions. + * Run BERT analysis using exec (for BullMQ/Docker) */ -export async function processTopicAnalysis( - payload: TopicAnalysisPayload -): Promise { - const { userId, minTopicSize = 10, nrTopics } = payload; - - console.log(`[BERT Topic Analysis] Starting analysis for user: ${userId}`); - console.log( - `[BERT Topic Analysis] Parameters: minTopicSize=${minTopicSize}, nrTopics=${nrTopics || "auto"}` - ); - - // Build the command +async function runBertWithExec( + userId: string, + minTopicSize: number, + nrTopics?: number, +): Promise { let command = `python3 /core/apps/webapp/app/bert/main.py ${userId} --json`; if (minTopicSize) { @@ -49,22 +44,55 @@ export async function processTopicAnalysis( console.log(`[BERT Topic Analysis] Executing: ${command}`); + const { stdout, stderr } = await execAsync(command, { + timeout: 300000, // 5 minutes + maxBuffer: 10 * 1024 * 1024, // 10MB buffer for large outputs + }); + + if (stderr) { + console.warn(`[BERT Topic Analysis] Warnings:`, stderr); + } + + return stdout; +} + +/** + * Process BERT topic analysis on user's episodes + * This is the common logic shared between Trigger.dev and BullMQ + * + * NOTE: This function does NOT update workspace.metadata.lastTopicAnalysisAt + * That should be done by the caller BEFORE enqueueing this job to prevent + * duplicate analyses from racing conditions. + */ +export async function processTopicAnalysis( + payload: TopicAnalysisPayload, + enqueueSpaceSummary?: (params: { + spaceId: string; + userId: string; + }) => Promise, + pythonRunner?: ( + userId: string, + minTopicSize: number, + nrTopics?: number, + ) => Promise, +): Promise { + const { userId, workspaceId, minTopicSize = 10, nrTopics } = payload; + + console.log(`[BERT Topic Analysis] Starting analysis for user: ${userId}`); + console.log( + `[BERT Topic Analysis] Parameters: minTopicSize=${minTopicSize}, nrTopics=${nrTopics || "auto"}`, + ); + try { const startTime = Date.now(); - // Execute the Python script with a 5-minute timeout - const { stdout, stderr } = await execAsync(command, { - timeout: 300000, // 5 minutes - maxBuffer: 10 * 1024 * 1024, // 10MB buffer for large outputs - }); + // Run BERT analysis using provided runner or default exec + const runner = pythonRunner || runBertWithExec; + const stdout = await runner(userId, minTopicSize, nrTopics); const duration = Date.now() - startTime; console.log(`[BERT Topic Analysis] Completed in ${duration}ms`); - if (stderr) { - console.warn(`[BERT Topic Analysis] Warnings:`, stderr); - } - // Parse the JSON output const result: TopicAnalysisResult = JSON.parse(stdout); @@ -72,13 +100,124 @@ export async function processTopicAnalysis( const topicCount = Object.keys(result.topics).length; const totalEpisodes = Object.values(result.topics).reduce( (sum, topic) => sum + topic.episodeIds.length, - 0 + 0, ); console.log( - `[BERT Topic Analysis] Found ${topicCount} topics covering ${totalEpisodes} episodes` + `[BERT Topic Analysis] Found ${topicCount} topics covering ${totalEpisodes} episodes`, ); + // Step 2: Identify spaces for topics using LLM + try { + logger.info("[BERT Topic Analysis] Starting space identification", { + userId, + topicCount, + }); + + const spaceProposals = await identifySpacesForTopics({ + userId, + topics: result.topics, + }); + + logger.info("[BERT Topic Analysis] Space identification completed", { + userId, + proposalCount: spaceProposals.length, + }); + + // Step 3: Create or find spaces and assign episodes + // Get existing spaces from PostgreSQL + const existingSpacesFromDb = await prisma.space.findMany({ + where: { workspaceId }, + }); + const existingSpacesByName = new Map( + existingSpacesFromDb.map((s) => [s.name.toLowerCase(), s]), + ); + + for (const proposal of spaceProposals) { + try { + // Check if space already exists (case-insensitive match) + let spaceId: string; + const existingSpace = existingSpacesByName.get( + proposal.name.toLowerCase(), + ); + + if (existingSpace) { + // Use existing space + spaceId = existingSpace.id; + logger.info("[BERT Topic Analysis] Using existing space", { + spaceName: proposal.name, + spaceId, + }); + } else { + // Create new space (creates in both PostgreSQL and Neo4j) + // Skip automatic space assignment since we're manually assigning from BERT topics + const spaceService = new SpaceService(); + const newSpace = await spaceService.createSpace( + { + name: proposal.name, + description: proposal.intent, + userId, + workspaceId, + }, + { skipAutoAssignment: true }, + ); + spaceId = newSpace.id; + logger.info("[BERT Topic Analysis] Created new space", { + spaceName: proposal.name, + spaceId, + intent: proposal.intent, + }); + } + + // Collect all episode IDs from the topics in this proposal + const episodeIds: string[] = []; + for (const topicId of proposal.topics) { + const topic = result.topics[topicId]; + if (topic) { + episodeIds.push(...topic.episodeIds); + } + } + + // Assign all episodes from these topics to the space + if (episodeIds.length > 0) { + await assignEpisodesToSpace(episodeIds, spaceId, userId); + logger.info("[BERT Topic Analysis] Assigned episodes to space", { + spaceName: proposal.name, + spaceId, + episodeCount: episodeIds.length, + topics: proposal.topics, + }); + + // Step 4: Trigger space summary if callback provided + if (enqueueSpaceSummary) { + await enqueueSpaceSummary({ spaceId, userId }); + logger.info("[BERT Topic Analysis] Triggered space summary", { + spaceName: proposal.name, + spaceId, + }); + } + } + } catch (spaceError) { + logger.error( + "[BERT Topic Analysis] Failed to process space proposal", + { + proposal, + error: spaceError, + }, + ); + // Continue with other proposals + } + } + } catch (spaceIdentificationError) { + logger.error( + "[BERT Topic Analysis] Space identification failed, returning topics only", + { + error: spaceIdentificationError, + }, + ); + // Return topics even if space identification fails + } + return result; } catch (error) { console.error(`[BERT Topic Analysis] Error:`, error); @@ -87,21 +226,19 @@ export async function processTopicAnalysis( // Check for timeout if (error.message.includes("ETIMEDOUT")) { throw new Error( - `Topic analysis timed out after 5 minutes. User may have too many episodes.` + `Topic analysis timed out after 5 minutes. User may have too many episodes.`, ); } // Check for Python errors if (error.message.includes("python3: not found")) { - throw new Error( - `Python 3 is not installed or not available in PATH.` - ); + throw new Error(`Python 3 is not installed or not available in PATH.`); } // Check for Neo4j connection errors if (error.message.includes("Failed to connect to Neo4j")) { throw new Error( - `Could not connect to Neo4j. Check NEO4J_URI and credentials.` + `Could not connect to Neo4j. Check NEO4J_URI and credentials.`, ); } diff --git a/apps/webapp/app/jobs/ingest/ingest-episode.logic.ts b/apps/webapp/app/jobs/ingest/ingest-episode.logic.ts index 57af32b..5ddc8ae 100644 --- a/apps/webapp/app/jobs/ingest/ingest-episode.logic.ts +++ b/apps/webapp/app/jobs/ingest/ingest-episode.logic.ts @@ -61,6 +61,7 @@ export async function processEpisodeIngestion( }) => Promise, enqueueBertTopicAnalysis?: (params: { userId: string; + workspaceId: string; minTopicSize?: number; nrTopics?: number; }) => Promise, @@ -281,6 +282,7 @@ export async function processEpisodeIngestion( await enqueueBertTopicAnalysis({ userId: payload.userId, + workspaceId: payload.workspaceId, minTopicSize: 10, }); diff --git a/apps/webapp/app/jobs/spaces/space-assignment.logic.ts b/apps/webapp/app/jobs/spaces/space-assignment.logic.ts new file mode 100644 index 0000000..5dbbf81 --- /dev/null +++ b/apps/webapp/app/jobs/spaces/space-assignment.logic.ts @@ -0,0 +1,1201 @@ +import { z } from "zod"; +import { logger } from "~/services/logger.service"; +import { SpaceService } from "~/services/space.server"; +import { makeModelCall } from "~/lib/model.server"; +import { createBatch, getBatch } from "~/lib/batch.server"; +import { runQuery } from "~/lib/neo4j.server"; +import { + assignEpisodesToSpace, + getSpaceEpisodeCount, +} from "~/services/graphModels/space"; +import { + updateMultipleSpaceStatuses, + SPACE_STATUS, +} from "~/trigger/utils/space-status"; +import type { CoreMessage } from "ai"; +import { type Space } from "@prisma/client"; + +export interface SpaceAssignmentPayload { + userId: string; + workspaceId: string; + mode: "new_space" | "episode"; + newSpaceId?: string; // For new_space mode + episodeIds?: string[]; // For daily_batch mode (default: 1) + batchSize?: number; // Processing batch size +} + +interface EpisodeData { + uuid: string; + content: string; + originalContent: string; + source: string; + createdAt: Date; + metadata: any; +} + +interface AssignmentResult { + episodeId: string; + spaceIds: string[]; + confidence: number; + reasoning?: string; +} + +const CONFIG = { + newSpaceMode: { + batchSize: 20, + confidenceThreshold: 0.75, // Intent-based threshold for new space creation + useBatchAPI: true, // Use batch API for new space mode + minEpisodesForBatch: 5, // Minimum episodes to use batch API + }, + episodeMode: { + batchSize: 20, + confidenceThreshold: 0.75, // Intent-based threshold for episode assignment + useBatchAPI: true, // Use batch API for episode mode + minEpisodesForBatch: 5, // Minimum episodes to use batch API + }, +}; + +// Zod schema for LLM response validation +const AssignmentResultSchema = z.array( + z.object({ + episodeId: z.string(), + addSpaceId: z.array(z.string()), + confidence: z.number(), + reasoning: z.string(), + }), +); + +export interface SpaceAssignmentResult { + success: boolean; + mode: string; + processed: number; + assignments: number; + batches?: number; + spacesAvailable: number; + affectedSpaces: number; + summaryTriggered: boolean; + patternCheckTriggered: boolean; +} + +/** + * Core business logic for space assignment + * This is shared between Trigger.dev and BullMQ implementations + */ +export async function processSpaceAssignment( + payload: SpaceAssignmentPayload, + // Callback function for triggering space summary + enqueueSpaceSummary?: (params: { + userId: string; + workspaceId: string; + spaceId: string; + triggerSource: "assignment" | "manual" | "scheduled"; + }) => Promise, +): Promise { + const { + userId, + workspaceId, + mode, + newSpaceId, + episodeIds, + batchSize = mode === "new_space" + ? CONFIG.newSpaceMode.batchSize + : CONFIG.episodeMode.batchSize, + } = payload; + + logger.info(`Starting space assignment`, { + userId, + mode, + newSpaceId, + episodeIds, + batchSize, + }); + + const spaceService = new SpaceService(); + + try { + // 1. Get user's spaces + const spaces = await spaceService.getUserSpaces(userId); + + if (spaces.length === 0) { + logger.info(`No spaces found for user ${userId}, skipping assignment`); + return { + success: true, + mode, + processed: 0, + assignments: 0, + spacesAvailable: 0, + affectedSpaces: 0, + summaryTriggered: false, + patternCheckTriggered: false, + }; + } + + // 2. Get episodes to analyze based on mode + const episodes = await getEpisodesToAnalyze(userId, mode, { + newSpaceId, + episodeIds, + }); + + if (episodes.length === 0) { + logger.info( + `No episodes to analyze for user ${userId} in ${mode} mode`, + ); + return { + success: true, + mode, + processed: 0, + assignments: 0, + spacesAvailable: spaces.length, + affectedSpaces: 0, + summaryTriggered: false, + patternCheckTriggered: false, + }; + } + + // 3. Process episodes using batch AI or fallback to sequential + const config = + mode === "new_space" ? CONFIG.newSpaceMode : CONFIG.episodeMode; + const shouldUseBatchAPI = true; + + let totalProcessed = 0; + let totalAssignments = 0; + let totalBatches = 0; + const affectedSpaces = new Set(); // Track spaces that received new episodes + + if (shouldUseBatchAPI) { + logger.info( + `Using Batch AI processing for ${episodes.length} episodes`, + { + mode, + userId, + batchSize, + }, + ); + + const batchResult = await processBatchAI( + episodes, + spaces, + userId, + mode, + newSpaceId, + batchSize, + ); + totalProcessed = batchResult.processed; + totalAssignments = batchResult.assignments; + batchResult.affectedSpaces?.forEach((spaceId) => + affectedSpaces.add(spaceId), + ); + } else { + logger.info( + `Using sequential processing for ${episodes.length} episodes (below batch threshold)`, + { + mode, + userId, + minRequired: config.minEpisodesForBatch, + }, + ); + + // Fallback to sequential processing for smaller episode sets + totalBatches = Math.ceil(episodes.length / batchSize); + + for (let i = 0; i < totalBatches; i++) { + const batch = episodes.slice(i * batchSize, (i + 1) * batchSize); + + logger.info( + `Processing batch ${i + 1}/${totalBatches} with ${batch.length} episodes`, + { + mode, + userId, + }, + ); + + const batchResult = await processBatch( + batch, + spaces, + userId, + mode, + newSpaceId, + ); + totalProcessed += batchResult.processed; + totalAssignments += batchResult.assignments; + batchResult.affectedSpaces?.forEach((spaceId) => + affectedSpaces.add(spaceId), + ); + + // Add delay between batches to avoid rate limiting + if (i < totalBatches - 1) { + await new Promise((resolve) => setTimeout(resolve, 1000)); + } + } + } + + logger.info(`Completed LLM space assignment`, { + userId, + mode, + totalProcessed, + totalAssignments, + spacesAvailable: spaces.length, + affectedSpaces: affectedSpaces.size, + }); + + // 4. Update space status to "processing" for affected spaces + if (affectedSpaces.size > 0) { + try { + await updateMultipleSpaceStatuses( + Array.from(affectedSpaces), + SPACE_STATUS.PROCESSING, + { + userId, + operation: "space-assignment", + metadata: { mode, phase: "start_processing" }, + }, + ); + } catch (statusError) { + logger.warn(`Failed to update space statuses to processing:`, { + error: statusError, + userId, + mode, + }); + } + } + + // 5. Trigger space summaries for affected spaces (fan-out pattern) + if (affectedSpaces.size > 0 && enqueueSpaceSummary) { + try { + logger.info( + `Triggering space summaries for ${affectedSpaces.size} affected spaces in parallel`, + ); + + // Fan out to multiple parallel triggers + const summaryPromises = Array.from(affectedSpaces).map((spaceId) => + enqueueSpaceSummary({ + userId, + workspaceId, + spaceId, + triggerSource: "assignment", + }).catch((error) => { + logger.warn(`Failed to trigger summary for space ${spaceId}:`, { + error, + }); + return { success: false, spaceId, error: error.message }; + }), + ); + + const summaryResults = await Promise.allSettled(summaryPromises); + const successful = summaryResults.filter( + (r) => r.status === "fulfilled", + ).length; + const failed = summaryResults.filter( + (r) => r.status === "rejected", + ).length; + + logger.info(`Space summary triggers completed`, { + userId, + mode, + totalSpaces: affectedSpaces.size, + successful, + failed, + }); + } catch (summaryError) { + // Don't fail the assignment if summary generation fails + logger.warn(`Failed to trigger space summaries after assignment:`, { + error: summaryError, + userId, + mode, + affectedSpaces: Array.from(affectedSpaces), + }); + } + } + + // 6. Update space status to "ready" after all processing is complete + if (affectedSpaces.size > 0) { + try { + await updateMultipleSpaceStatuses( + Array.from(affectedSpaces), + SPACE_STATUS.READY, + { + userId, + operation: "space-assignment", + metadata: { mode, phase: "completed_processing" }, + }, + ); + } catch (finalStatusError) { + logger.warn(`Failed to update space statuses to ready:`, { + error: finalStatusError, + userId, + mode, + }); + } + } + + return { + success: true, + mode, + processed: totalProcessed, + assignments: totalAssignments, + batches: totalBatches, + spacesAvailable: spaces.length, + affectedSpaces: affectedSpaces.size, + summaryTriggered: affectedSpaces.size > 0, + patternCheckTriggered: affectedSpaces.size > 0, + }; + } catch (error) { + logger.error( + `Error in LLM space assignment for user ${userId}:`, + error as Record, + ); + throw error; + } +} + +async function getEpisodesToAnalyze( + userId: string, + mode: "new_space" | "episode", + options: { newSpaceId?: string; episodeIds?: string[] }, +): Promise { + let query: string; + let params: any = { userId }; + + if (mode === "new_space") { + // For new space: analyze all recent episodes + query = ` + MATCH (e:Episode {userId: $userId}) + RETURN e + ORDER BY e.createdAt DESC + LIMIT 1000 + `; + } else { + // For episode mode: analyze specific episodes + query = ` + UNWIND $episodeIds AS episodeId + MATCH (e:Episode {uuid: episodeId, userId: $userId}) + RETURN e + ORDER BY e.createdAt DESC + `; + params.episodeIds = options.episodeIds; + } + + const result = await runQuery(query, params); + + return result.map((record) => { + const episode = record.get("e").properties; + return { + uuid: episode.uuid, + content: episode.content, + originalContent: episode.originalContent, + source: episode.source, + createdAt: new Date(episode.createdAt), + metadata: JSON.parse(episode.metadata || "{}"), + }; + }); +} + +async function processBatchAI( + episodes: EpisodeData[], + spaces: Space[], + userId: string, + mode: "new_space" | "episode", + newSpaceId?: string, + batchSize: number = 50, +): Promise<{ + processed: number; + assignments: number; + affectedSpaces?: string[]; +}> { + try { + // Create batches of episodes + const episodeBatches: EpisodeData[][] = []; + for (let i = 0; i < episodes.length; i += batchSize) { + episodeBatches.push(episodes.slice(i, i + batchSize)); + } + + logger.info( + `Creating ${episodeBatches.length} batch AI requests for ${episodes.length} episodes`, + ); + + // Create batch requests with prompts + const batchRequests = await Promise.all( + episodeBatches.map(async (batch, index) => { + const promptMessages = await createLLMPrompt( + batch, + spaces, + mode, + newSpaceId, + userId, + ); + const systemPrompt = + promptMessages.find((m) => m.role === "system")?.content || ""; + const userPrompt = + promptMessages.find((m) => m.role === "user")?.content || ""; + + return { + customId: `episode-space-assignment-${userId}-${mode}-${index}`, + messages: [{ role: "user" as const, content: userPrompt }], + systemPrompt, + }; + }), + ); + + // Submit batch to AI provider + const { batchId } = await createBatch({ + requests: batchRequests, + outputSchema: AssignmentResultSchema, + maxRetries: 3, + timeoutMs: 1200000, // 10 minutes timeout + }); + + logger.info(`Batch AI job created: ${batchId}`, { + userId, + mode, + batchRequests: batchRequests.length, + }); + + // Poll for completion with improved handling + const maxPollingTime = 1200000; // 13 minutes + const pollInterval = 5000; // 5 seconds + const startTime = Date.now(); + + let batch = await getBatch({ batchId }); + + while (batch.status === "processing" || batch.status === "pending") { + const elapsed = Date.now() - startTime; + + if (elapsed > maxPollingTime) { + logger.warn( + `Batch AI job timed out after ${elapsed}ms, processing partial results`, + { + batchId, + status: batch.status, + completed: batch.completedRequests, + total: batch.totalRequests, + failed: batch.failedRequests, + }, + ); + break; // Exit loop to process any available results + } + + logger.info(`Batch AI job status: ${batch.status}`, { + batchId, + completed: batch.completedRequests, + total: batch.totalRequests, + failed: batch.failedRequests, + elapsed: elapsed, + }); + + await new Promise((resolve) => setTimeout(resolve, pollInterval)); + batch = await getBatch({ batchId }); + } + + // Handle different completion scenarios + if (batch.status === "failed") { + logger.error(`Batch AI job failed completely`, { + batchId, + status: batch.status, + }); + throw new Error(`Batch AI job failed with status: ${batch.status}`); + } + + // Log final status regardless of completion state + logger.info(`Batch AI job processing finished`, { + batchId, + status: batch.status, + completed: batch.completedRequests, + total: batch.totalRequests, + failed: batch.failedRequests, + }); + + if (!batch.results || batch.results.length === 0) { + logger.warn(`No results returned from batch AI job ${batchId}`, { + status: batch.status, + completed: batch.completedRequests, + failed: batch.failedRequests, + }); + + // If we have no results but some requests failed, fall back to sequential processing + if (batch.failedRequests && batch.failedRequests > 0) { + logger.info( + `Falling back to sequential processing due to batch failures`, + ); + return await processBatch(episodes, spaces, userId, mode, newSpaceId); + } + + return { processed: episodes.length, assignments: 0 }; + } + + logger.info(`Processing batch results`, { + batchId, + status: batch.status, + resultsCount: batch.results.length, + totalRequests: batch.totalRequests, + completedRequests: batch.completedRequests, + failedRequests: batch.failedRequests, + }); + + // Process all batch results + let totalAssignments = 0; + const affectedSpaces = new Set(); + const confidenceThreshold = + mode === "new_space" + ? CONFIG.newSpaceMode.confidenceThreshold + : CONFIG.episodeMode.confidenceThreshold; + + for (const result of batch.results) { + if (result.error) { + logger.warn(`Batch AI request ${result.customId} failed:`, { + error: result.error, + }); + continue; + } + + if (!result.response) { + logger.warn(`No response from batch AI request ${result.customId}`); + continue; + } + + // Parse assignments from this batch result + let assignments: AssignmentResult[] = []; + try { + // Extract episode batch info from customId + const batchIndexMatch = result.customId.match(/-(\d+)$/); + const batchIndex = batchIndexMatch ? parseInt(batchIndexMatch[1]) : 0; + const episodeBatch = episodeBatches[batchIndex]; + + if (Array.isArray(result.response)) { + // Handle direct array response (from structured output) + assignments = result.response.map((a) => ({ + episodeId: a.episodeId, + spaceIds: a.addSpaceId || [], + confidence: a.confidence || 0.75, + reasoning: a.reasoning, + })); + } else if (typeof result.response === "string") { + // Parse from text response with tags (fallback for non-structured output) + assignments = parseLLMResponseWithTags( + result.response, + episodeBatch, + spaces, + ); + } else if (typeof result.response === "object" && result.response) { + // Handle object response that might contain the array directly + try { + let responseData = result.response; + if (responseData.results && Array.isArray(responseData.results)) { + responseData = responseData.results; + } + + if (Array.isArray(responseData)) { + assignments = responseData.map((a) => ({ + episodeId: a.episodeId, + spaceIds: a.addSpaceId || [], + confidence: a.confidence || 0.75, + reasoning: a.reasoning, + })); + } else { + // Fallback parsing + assignments = parseLLMResponse( + JSON.stringify(result.response), + episodeBatch, + spaces, + ); + } + } catch (parseError) { + logger.error( + `Error processing object response ${result.customId}:`, + { error: parseError }, + ); + assignments = []; + } + } else { + // Fallback parsing + assignments = parseLLMResponse( + JSON.stringify(result.response), + episodeBatch, + spaces, + ); + } + } catch (parseError) { + logger.error(`Error parsing batch result ${result.customId}:`, { + error: parseError, + }); + continue; + } + + // Group episodes by space for batch assignment + const spaceToEpisodes = new Map(); + + for (const assignment of assignments) { + if ( + assignment.spaceIds.length > 0 && + assignment.confidence >= confidenceThreshold + ) { + for (const spaceId of assignment.spaceIds) { + if (!spaceToEpisodes.has(spaceId)) { + spaceToEpisodes.set(spaceId, []); + } + spaceToEpisodes.get(spaceId)!.push(assignment.episodeId); + } + } + } + + // Apply batch assignments - one call per space + for (const [spaceId, episodeIds] of spaceToEpisodes) { + try { + const assignmentResult = await assignEpisodesToSpace( + episodeIds, + spaceId, + userId, + ); + + if (assignmentResult.success) { + totalAssignments += episodeIds.length; + affectedSpaces.add(spaceId); + logger.info( + `Batch AI assigned ${episodeIds.length} episodes to space ${spaceId}`, + { + episodeIds, + mode, + batchId: result.customId, + }, + ); + } + } catch (error) { + logger.warn( + `Failed to assign ${episodeIds.length} episodes to space ${spaceId}:`, + { error, episodeIds }, + ); + } + } + } + + // Log final batch processing results + logger.info(`Batch AI processing completed`, { + batchId, + totalEpisodes: episodes.length, + processedBatches: batch.results.length, + totalAssignments, + affectedSpaces: affectedSpaces.size, + completedRequests: batch.completedRequests, + failedRequests: batch.failedRequests || 0, + }); + + // If we have significant failures, consider fallback processing for remaining episodes + const failureRate = batch.failedRequests + ? batch.failedRequests / batch.totalRequests + : 0; + if (failureRate > 0.5) { + // If more than 50% failed + logger.warn( + `High failure rate (${Math.round(failureRate * 100)}%) in batch processing, consider reviewing prompts or input quality`, + ); + } + + return { + processed: episodes.length, + assignments: totalAssignments, + affectedSpaces: Array.from(affectedSpaces), + }; + } catch (error) { + logger.error("Error in Batch AI processing:", { error }); + throw error; + } +} + +async function processBatch( + episodes: EpisodeData[], + spaces: Space[], + userId: string, + mode: "new_space" | "episode", + newSpaceId?: string, +): Promise<{ + processed: number; + assignments: number; + affectedSpaces?: string[]; +}> { + try { + // Create the LLM prompt based on mode + const prompt = await createLLMPrompt( + episodes, + spaces, + mode, + newSpaceId, + userId, + ); + + // Episode-intent matching is MEDIUM complexity (semantic analysis with intent alignment) + let responseText = ""; + await makeModelCall( + false, + prompt, + (text: string) => { + responseText = text; + }, + undefined, + "high", + ); + + // Parse LLM response + const assignments = parseLLMResponseWithTags( + responseText, + episodes, + spaces, + ); + + // Apply assignments + let totalAssignments = 0; + const affectedSpaces = new Set(); + const confidenceThreshold = + mode === "new_space" + ? CONFIG.newSpaceMode.confidenceThreshold + : CONFIG.episodeMode.confidenceThreshold; + + for (const assignment of assignments) { + if ( + assignment.spaceIds.length > 0 && + assignment.confidence >= confidenceThreshold + ) { + // Assign to each space individually to track metadata properly + for (const spaceId of assignment.spaceIds) { + try { + const result = await assignEpisodesToSpace( + [assignment.episodeId], + spaceId, + userId, + ); + + if (result.success) { + totalAssignments++; + affectedSpaces.add(spaceId); + + logger.info( + `LLM assigned episode ${assignment.episodeId} to space ${spaceId}`, + { + confidence: assignment.confidence, + reasoning: assignment.reasoning || "No reasoning", + mode, + } as Record, + ); + } + } catch (error) { + logger.warn( + `Failed to assign episode ${assignment.episodeId} to space ${spaceId}:`, + error as Record, + ); + } + } + } + } + + return { + processed: episodes.length, + assignments: totalAssignments, + affectedSpaces: Array.from(affectedSpaces), + }; + } catch (error) { + logger.error("Error processing batch:", error as Record); + return { processed: 0, assignments: 0, affectedSpaces: [] }; + } +} + +async function createLLMPrompt( + episodes: EpisodeData[], + spaces: Space[], + mode: "new_space" | "episode", + newSpaceId?: string, + userId?: string, +): Promise { + const episodesDescription = episodes + .map( + (ep) => + `ID: ${ep.uuid}\nCONTENT: ${ep.content}\nSOURCE: ${ep.source}\nMETADATA: ${JSON.stringify(ep.metadata)}`, + ) + .join("\n\n"); + + // Get enhanced space information with episode counts + const enhancedSpaces = await Promise.all( + spaces.map(async (space) => { + const currentCount = userId + ? await getSpaceEpisodeCount(space.id, userId) + : 0; + return { + ...space, + currentEpisodeCount: currentCount, + }; + }), + ); + + if (mode === "new_space" && newSpaceId) { + // Focus on the new space for assignment + const newSpace = enhancedSpaces.find((s) => s.id === newSpaceId); + if (!newSpace) { + throw new Error(`New space ${newSpaceId} not found`); + } + + return [ + { + role: "system", + content: `You are analyzing episodes for assignment to a newly created space based on the space's intent and purpose. + +CORE PRINCIPLE: Match episodes based on WHAT THE EPISODE IS FUNDAMENTALLY ABOUT (its primary subject), not just keyword overlap. + +STEP-BY-STEP FILTERING PROCESS: + +Step 1: IDENTIFY PRIMARY SUBJECT +Ask: "Who or what is this episode fundamentally about?" +- Is it about a specific person? (by name, or "I"/"my" = speaker) +- Is it about a system, tool, or organization? +- Is it about a project, event, or activity? +- Is it about a concept, topic, or idea? + +Step 2: HANDLE IMPLICIT SUBJECTS +- "I prefer..." or "My..." β†’ Subject is the SPEAKER (check episode source/metadata for identity) +- "User discussed..." or "Person X said..." β†’ Subject is that specific person +- "We decided..." β†’ Subject is the group/team/project being discussed +- If unclear, identify from context clues in the episode content + +Step 3: CHECK SUBJECT ALIGNMENT +Does the PRIMARY SUBJECT match what the space is about? +- Match the subject identity (right person/thing/concept?) +- Match the subject relationship (is episode ABOUT the subject or just MENTIONING it?) +- Match the intent purpose (does episode serve the space's purpose?) +- Check scope constraints: If space description includes scope requirements (e.g., "cross-context", "not app-specific", "broadly useful", "stable for 3+ months"), verify episode meets those constraints + +Step 4: DISTINGUISH SUBJECT vs META +Ask: "Is this episode ABOUT the subject itself, or ABOUT discussing/analyzing the subject?" +- ABOUT subject: Episode contains actual content related to subject +- META-discussion: Episode discusses how to handle/analyze/organize the subject +- Only assign if episode is ABOUT the subject, not meta-discussion + +Step 5: VERIFY CONFIDENCE +Only assign if confidence >= 0.75 based on: +- Subject identity clarity (is subject clearly identified?) +- Subject alignment strength (how well does it match space intent?) +- Content relevance (does episode content serve space purpose?) + +CRITICAL RULE: PRIMARY SUBJECT MATCHING +The episode's PRIMARY SUBJECT must match the space's target subject. +- If space is about Person A, episodes about Person B should NOT match (even if same topic) +- If space is about a specific concept, meta-discussions about that concept should NOT match +- If space is about actual behaviors/facts, process discussions about organizing those facts should NOT match + +EXAMPLES OF CORRECT FILTERING: + +Example 1 - Person Identity: +Space: "Alex's work preferences" +Episode A: "I prefer morning meetings and async updates" (speaker: Alex) β†’ ASSIGN βœ… (primary subject: Alex's preferences) +Episode B: "Jordan prefers afternoon meetings" (speaker: System) β†’ DO NOT ASSIGN ❌ (primary subject: Jordan, not Alex) + +Example 2 - Meta vs Actual: +Space: "Recipe collection" +Episode A: "My lasagna recipe: 3 layers pasta, bΓ©chamel, meat sauce..." β†’ ASSIGN βœ… (primary subject: actual recipe) +Episode B: "We should organize recipes by cuisine type" β†’ DO NOT ASSIGN ❌ (primary subject: organizing system, not recipe) + +Example 3 - Keyword Overlap Without Subject Match: +Space: "Home renovation project" +Episode A: "Installed new kitchen cabinets, chose oak wood" β†’ ASSIGN βœ… (primary subject: home renovation) +Episode B: "Friend asked advice about their kitchen renovation" β†’ DO NOT ASSIGN ❌ (primary subject: friend's project, not this home) + +Example 4 - Scope Constraints: +Space: "Personal identity and preferences (broadly useful across contexts, not app-specific)" +Episode A: "I prefer async communication and morning work hours" β†’ ASSIGN βœ… (cross-context preference, broadly applicable) +Episode B: "Demonstrated knowledge of ProjectX technical stack" β†’ DO NOT ASSIGN ❌ (work/project knowledge, not personal identity) + +RESPONSE FORMAT: +Provide your response inside tags with a valid JSON array: + + +[ + { + "episodeId": "episode-uuid", + "addSpaceId": ["${newSpaceId}"], + "confidence": 0.75, + "reasoning": "Brief explanation of intent match" + } +] + + +IMPORTANT: If an episode doesn't align with the space's intent, use empty addSpaceId array: [] +Example: {"episodeId": "ep-123", "addSpaceId": [], "confidence": 0.0, "reasoning": "No intent alignment"}`, + }, + { + role: "user", + content: `NEW SPACE TO POPULATE: +Name: ${newSpace.name} +Intent/Purpose: ${newSpace.description || "No description"} +Current Episodes: ${newSpace.currentEpisodeCount} + +EPISODES TO EVALUATE: +${episodesDescription} + +ASSIGNMENT TASK: +For each episode above, follow the step-by-step process to determine if it should be assigned to this space. + +Remember: +1. Identify the PRIMARY SUBJECT of each episode (who/what is it about?) +2. Check if that PRIMARY SUBJECT matches what this space is about +3. If the episode is ABOUT something else (even if it mentions related keywords), do NOT assign +4. If the episode is a META-discussion about the space's topic (not actual content), do NOT assign +5. Only assign if the episode's primary subject aligns with the space's intent AND confidence >= 0.75 + +Provide your analysis and assignments using the specified JSON format.`, + }, + ]; + } else { + // Episode mode - consider all spaces + const spacesDescription = enhancedSpaces + .map((space) => { + const spaceInfo = [ + `- ${space.name} (${space.id})`, + ` Intent/Purpose: ${space.description || "No description"}`, + ` Current Episodes: ${space.currentEpisodeCount}`, + ]; + + if (space.summary) { + spaceInfo.push(` Summary: ${space.summary}`); + } + + return spaceInfo.join("\n"); + }) + .join("\n\n"); + + return [ + { + role: "system", + content: `You are an expert at organizing episodes into semantic spaces based on the space's intent and purpose. + +CORE PRINCIPLE: Match episodes based on WHAT THE EPISODE IS FUNDAMENTALLY ABOUT (its primary subject), not just keyword overlap. + +STEP-BY-STEP FILTERING PROCESS: + +Step 1: IDENTIFY PRIMARY SUBJECT +Ask: "Who or what is this episode fundamentally about?" +- Is it about a specific person? (by name, or "I"/"my" = speaker) +- Is it about a system, tool, or organization? +- Is it about a project, event, or activity? +- Is it about a concept, topic, or idea? + +Step 2: HANDLE IMPLICIT SUBJECTS +- "I prefer..." or "My..." β†’ Subject is the SPEAKER (check episode source/metadata for identity) +- "User discussed..." or "Person X said..." β†’ Subject is that specific person +- "We decided..." β†’ Subject is the group/team/project being discussed +- If unclear, identify from context clues in the episode content + +Step 3: CHECK SUBJECT ALIGNMENT WITH EACH SPACE +For each available space, does the episode's PRIMARY SUBJECT match what that space is about? +- Match the subject identity (right person/thing/concept?) +- Match the subject relationship (is episode ABOUT the subject or just MENTIONING it?) +- Match the intent purpose (does episode serve the space's purpose?) +- An episode can match multiple spaces if its primary subject serves multiple intents + +Step 4: DISTINGUISH SUBJECT vs META +Ask: "Is this episode ABOUT the subject itself, or ABOUT discussing/analyzing the subject?" +- ABOUT subject: Episode contains actual content related to subject +- META-discussion: Episode discusses how to handle/analyze/organize the subject +- Only assign if episode is ABOUT the subject, not meta-discussion + +Step 5: VERIFY CONFIDENCE +Only assign to a space if confidence >= 0.75 based on: +- Subject identity clarity (is subject clearly identified?) +- Subject alignment strength (how well does it match space intent?) +- Content relevance (does episode content serve space purpose?) + +Step 6: MULTI-SPACE ASSIGNMENT +- An episode can belong to multiple spaces if its primary subject serves multiple intents +- Each space assignment should meet the >= 0.75 confidence threshold independently +- If no spaces match, use empty addSpaceId: [] + +CRITICAL RULE: PRIMARY SUBJECT MATCHING +The episode's PRIMARY SUBJECT must match the space's target subject. +- If space is about Person A, episodes about Person B should NOT match (even if same topic) +- If space is about a specific concept, meta-discussions about that concept should NOT match +- If space is about actual behaviors/facts, process discussions about organizing those facts should NOT match + +EXAMPLES OF CORRECT FILTERING: + +Example 1 - Person Identity: +Space: "Alex's work preferences" +Episode A: "I prefer morning meetings and async updates" (speaker: Alex) β†’ ASSIGN βœ… (primary subject: Alex's preferences) +Episode B: "Jordan prefers afternoon meetings" (speaker: System) β†’ DO NOT ASSIGN ❌ (primary subject: Jordan, not Alex) + +Example 2 - Meta vs Actual: +Space: "Recipe collection" +Episode A: "My lasagna recipe: 3 layers pasta, bΓ©chamel, meat sauce..." β†’ ASSIGN βœ… (primary subject: actual recipe) +Episode B: "We should organize recipes by cuisine type" β†’ DO NOT ASSIGN ❌ (primary subject: organizing system, not recipe) + +Example 3 - Keyword Overlap Without Subject Match: +Space: "Home renovation project" +Episode A: "Installed new kitchen cabinets, chose oak wood" β†’ ASSIGN βœ… (primary subject: home renovation) +Episode B: "Friend asked advice about their kitchen renovation" β†’ DO NOT ASSIGN ❌ (primary subject: friend's project, not this home) + +Example 4 - Scope Constraints: +Space: "Personal identity and preferences (broadly useful across contexts, not app-specific)" +Episode A: "I prefer async communication and morning work hours" β†’ ASSIGN βœ… (cross-context preference, broadly applicable) +Episode B: "I format task titles as {verb}: {title} in TaskApp" β†’ DO NOT ASSIGN ❌ (app-specific behavior, fails "not app-specific" constraint) +Episode C: "Demonstrated knowledge of ProjectX technical stack" β†’ DO NOT ASSIGN ❌ (work/project knowledge, not personal identity) + +RESPONSE FORMAT: +Provide your response inside tags with a valid JSON array: + + +[ + { + "episodeId": "episode-uuid", + "addSpaceId": ["space-uuid1", "space-uuid2"], + "confidence": 0.75, + "reasoning": "Brief explanation of intent match" + } +] + + +IMPORTANT: If no spaces' intents align with an episode, use empty addSpaceId array: [] +Example: {"episodeId": "ep-123", "addSpaceId": [], "confidence": 0.0, "reasoning": "No matching space intent"}`, + }, + { + role: "user", + content: `AVAILABLE SPACES (with their intents/purposes): +${spacesDescription} + +EPISODES TO ORGANIZE: +${episodesDescription} + +ASSIGNMENT TASK: +For each episode above, follow the step-by-step process to determine which space(s) it should be assigned to. + +Remember: +1. Identify the PRIMARY SUBJECT of each episode (who/what is it about?) +2. Check if that PRIMARY SUBJECT matches what each space is about +3. If the episode is ABOUT something else (even if it mentions related keywords), do NOT assign to that space +4. If the episode is a META-discussion about a space's topic (not actual content), do NOT assign to that space +5. An episode can be assigned to multiple spaces if its primary subject serves multiple intents +6. Only assign if the episode's primary subject aligns with the space's intent AND confidence >= 0.75 for that space + +Provide your analysis and assignments using the specified JSON format.`, + }, + ]; + } +} + +function parseLLMResponseWithTags( + response: string, + episodes: EpisodeData[], + spaces: Space[], +): AssignmentResult[] { + try { + // Extract content from tags + const outputMatch = response.match(/([\s\S]*?)<\/output>/); + if (!outputMatch) { + logger.warn( + "No tags found in LLM response, falling back to full response parsing", + ); + return parseLLMResponse(response, episodes, spaces); + } + + const jsonContent = outputMatch[1].trim(); + const parsed = JSON.parse(jsonContent); + + if (!Array.isArray(parsed)) { + logger.warn( + "Invalid LLM response format - expected array in tags", + ); + return []; + } + + const validSpaceIds = new Set(spaces.map((s) => s.id)); + const validEpisodeIds = new Set(episodes.map((e) => e.uuid)); + + return parsed + .filter((assignment: any) => { + // Validate assignment structure + if ( + !assignment.episodeId || + !validEpisodeIds.has(assignment.episodeId) + ) { + return false; + } + + // Validate spaceIds array + if (!assignment.addSpaceId || !Array.isArray(assignment.addSpaceId)) { + assignment.addSpaceId = []; + } + + // Filter out invalid space IDs + assignment.addSpaceId = assignment.addSpaceId.filter( + (spaceId: string) => validSpaceIds.has(spaceId), + ); + + return true; + }) + .map((assignment: any) => ({ + episodeId: assignment.episodeId, + spaceIds: assignment.addSpaceId, + confidence: assignment.confidence || 0.75, + reasoning: assignment.reasoning, + })); + } catch (error) { + logger.error( + "Error parsing LLM response with tags:", + error as Record, + ); + logger.debug("Raw LLM response:", { response } as Record); + // Fallback to regular parsing + return parseLLMResponse(response, episodes, spaces); + } +} + +function parseLLMResponse( + response: string, + episodes: EpisodeData[], + spaces: Space[], +): AssignmentResult[] { + try { + // Clean the response - remove any markdown formatting + const cleanedResponse = response + .replace(/```json\n?/g, "") + .replace(/```\n?/g, "") + .trim(); + + const parsed = JSON.parse(cleanedResponse); + + if (!parsed.assignments || !Array.isArray(parsed.assignments)) { + logger.warn("Invalid LLM response format - no assignments array"); + return []; + } + + const validSpaceIds = new Set(spaces.map((s) => s.id)); + const validEpisodeIds = new Set(episodes.map((e) => e.uuid)); + + return parsed.assignments + .filter((assignment: any) => { + // Validate assignment structure + if ( + !assignment.episodeId || + !validEpisodeIds.has(assignment.episodeId) + ) { + return false; + } + + if (!assignment.spaceIds || !Array.isArray(assignment.spaceIds)) { + return false; + } + + // Filter out invalid space IDs + assignment.spaceIds = assignment.spaceIds.filter((spaceId: string) => + validSpaceIds.has(spaceId), + ); + + return true; + }) + .map((assignment: any) => ({ + episodeId: assignment.episodeId, + spaceIds: assignment.spaceIds, + confidence: assignment.confidence || 0.75, + reasoning: assignment.reasoning, + })); + } catch (error) { + logger.error( + "Error parsing LLM response:", + error as Record, + ); + logger.debug("Raw LLM response:", { response } as Record); + return []; + } +} diff --git a/apps/webapp/app/jobs/spaces/space-identification.logic.ts b/apps/webapp/app/jobs/spaces/space-identification.logic.ts new file mode 100644 index 0000000..55cfb42 --- /dev/null +++ b/apps/webapp/app/jobs/spaces/space-identification.logic.ts @@ -0,0 +1,229 @@ +/** + * Space Identification Logic + * + * Uses LLM to identify appropriate spaces for topics discovered by BERT analysis + */ + +import { makeModelCall } from "~/lib/model.server"; +import { getAllSpacesForUser } from "~/services/graphModels/space"; +import { getEpisode } from "~/services/graphModels/episode"; +import { logger } from "~/services/logger.service"; +import type { SpaceNode } from "@core/types"; + +export interface TopicData { + keywords: string[]; + episodeIds: string[]; +} + +export interface SpaceProposal { + name: string; + intent: string; + confidence: number; + reason: string; + topics: string[]; // Array of topic IDs +} + +interface IdentifySpacesParams { + userId: string; + topics: Record; +} + +/** + * Identify spaces for topics using LLM analysis + * Takes top 10 keywords and top 5 episodes per topic + */ +export async function identifySpacesForTopics( + params: IdentifySpacesParams, +): Promise { + const { userId, topics } = params; + + // Get existing spaces for the user + const existingSpaces = await getAllSpacesForUser(userId); + + // Prepare topic data with top 10 keywords and top 5 episodes + const topicsForAnalysis = await Promise.all( + Object.entries(topics).map(async ([topicId, topicData]) => { + // Take top 10 keywords + const topKeywords = topicData.keywords.slice(0, 10); + + // Take top 5 episodes and fetch their content + const topEpisodeIds = topicData.episodeIds.slice(0, 5); + const episodes = await Promise.all( + topEpisodeIds.map((id) => getEpisode(id)), + ); + + return { + topicId, + keywords: topKeywords, + episodes: episodes + .filter((e) => e !== null) + .map((e) => ({ + content: e!.content.substring(0, 500), // Limit to 500 chars per episode + })), + episodeCount: topicData.episodeIds.length, + }; + }), + ); + + // Build the prompt + const prompt = buildSpaceIdentificationPrompt( + existingSpaces, + topicsForAnalysis, + ); + + logger.info("Identifying spaces for topics", { + userId, + topicCount: Object.keys(topics).length, + existingSpaceCount: existingSpaces.length, + }); + + // Call LLM with structured output + let responseText = ""; + await makeModelCall( + false, // not streaming + [{ role: "user", content: prompt }], + (text) => { + responseText = text; + }, + { + temperature: 0.7, + }, + "high", // Use high complexity for space identification + ); + + // Parse the response + const proposals = parseSpaceProposals(responseText); + + logger.info("Space identification completed", { + userId, + proposalCount: proposals.length, + }); + + return proposals; +} + +/** + * Build the prompt for space identification + */ +function buildSpaceIdentificationPrompt( + existingSpaces: SpaceNode[], + topics: Array<{ + topicId: string; + keywords: string[]; + episodes: Array<{ content: string }>; + episodeCount: number; + }>, +): string { + const existingSpacesSection = + existingSpaces.length > 0 + ? `## Existing Spaces + +The user currently has these spaces: +${existingSpaces.map((s) => `- **${s.name}**: ${s.description || "No description"} (${s.contextCount || 0} episodes)`).join("\n")} + +When identifying new spaces, consider if topics fit into existing spaces or if new spaces are needed.` + : `## Existing Spaces + +The user currently has no spaces defined. This is a fresh start for space organization.`; + + const topicsSection = `## Topics Discovered + +BERT topic modeling has identified ${topics.length} distinct topics from the user's episodes. Each topic represents a cluster of semantically related content. + +${topics + .map( + (t, idx) => `### Topic ${idx + 1} (ID: ${t.topicId}) +**Episode Count**: ${t.episodeCount} +**Top Keywords**: ${t.keywords.join(", ")} + +**Sample Episodes** (showing ${t.episodes.length} of ${t.episodeCount}): +${t.episodes.map((e, i) => `${i + 1}. ${e.content}`).join("\n")} +`, + ) + .join("\n")}`; + + return `You are a knowledge organization expert. Your task is to analyze discovered topics and identify appropriate "spaces" (thematic containers) for organizing episodic memories. + +${existingSpacesSection} + +${topicsSection} + +## Task + +Analyze the topics above and identify spaces that would help organize this content meaningfully. For each space: + +1. **Consider existing spaces first**: If topics clearly belong to existing spaces, assign them there +2. **Create new spaces when needed**: If topics represent distinct themes not covered by existing spaces +3. **Group related topics**: Multiple topics can be assigned to the same space if they share a theme +4. **Aim for 20-50 episodes per space**: This is the sweet spot for space cohesion +5. **Focus on user intent**: What would help the user find and understand this content later? + +## Output Format + +Return your analysis as a JSON array of space proposals. Each proposal should have: + +\`\`\`json +[ + { + "name": "Space name (use existing space name if assigning to existing space)", + "intent": "Clear description of what this space represents", + "confidence": 0.85, + "reason": "Brief explanation of why these topics belong together", + "topics": ["topic-id-1", "topic-id-2"] + } +] +\`\`\` + +**Important Guidelines**: +- **confidence**: 0.0-1.0 scale indicating how confident you are this is a good grouping +- **topics**: Array of topic IDs (use the exact IDs from above like "0", "1", "-1", etc.) +- **name**: For existing spaces, use the EXACT name. For new spaces, create a clear, concise name +- Only propose spaces with confidence >= 0.6 +- Each topic should only appear in ONE space proposal +- Topic "-1" is the outlier topic (noise) - only include if it genuinely fits a theme + +Return ONLY the JSON array, no additional text.`; +} + +/** + * Parse space proposals from LLM response + */ +function parseSpaceProposals(responseText: string): SpaceProposal[] { + try { + // Extract JSON from markdown code blocks if present + const jsonMatch = responseText.match(/```(?:json)?\s*(\[[\s\S]*?\])\s*```/); + const jsonText = jsonMatch ? jsonMatch[1] : responseText; + + const proposals = JSON.parse(jsonText.trim()); + + if (!Array.isArray(proposals)) { + throw new Error("Response is not an array"); + } + + // Validate and filter proposals + return proposals + .filter((p) => { + return ( + p.name && + p.intent && + typeof p.confidence === "number" && + p.confidence >= 0.6 && + Array.isArray(p.topics) && + p.topics.length > 0 + ); + }) + .map((p) => ({ + name: p.name.trim(), + intent: p.intent.trim(), + confidence: p.confidence, + reason: (p.reason || "").trim(), + topics: p.topics.map((t: any) => String(t)), + })); + } catch (error) { + logger.error("Failed to parse space proposals", { + error, + responseText: responseText.substring(0, 500), + }); + return []; + } +} diff --git a/apps/webapp/app/jobs/spaces/space-summary.logic.ts b/apps/webapp/app/jobs/spaces/space-summary.logic.ts new file mode 100644 index 0000000..0482fa5 --- /dev/null +++ b/apps/webapp/app/jobs/spaces/space-summary.logic.ts @@ -0,0 +1,721 @@ +import { logger } from "~/services/logger.service"; +import { SpaceService } from "~/services/space.server"; +import { makeModelCall } from "~/lib/model.server"; +import { runQuery } from "~/lib/neo4j.server"; +import { updateSpaceStatus, SPACE_STATUS } from "~/trigger/utils/space-status"; +import type { CoreMessage } from "ai"; +import { z } from "zod"; +import { getSpace, updateSpace } from "~/trigger/utils/space-utils"; +import { getSpaceEpisodeCount } from "~/services/graphModels/space"; + +export interface SpaceSummaryPayload { + userId: string; + spaceId: string; // Single space only + triggerSource?: "assignment" | "manual" | "scheduled"; +} + +interface SpaceEpisodeData { + uuid: string; + content: string; + originalContent: string; + source: string; + createdAt: Date; + validAt: Date; + metadata: any; + sessionId: string | null; +} + +interface SpaceSummaryData { + spaceId: string; + spaceName: string; + spaceDescription?: string; + contextCount: number; + summary: string; + keyEntities: string[]; + themes: string[]; + confidence: number; + lastUpdated: Date; + isIncremental: boolean; +} + +// Zod schema for LLM response validation +const SummaryResultSchema = z.object({ + summary: z.string(), + keyEntities: z.array(z.string()), + themes: z.array(z.string()), + confidence: z.number().min(0).max(1), +}); + +const CONFIG = { + maxEpisodesForSummary: 20, // Limit episodes for performance + minEpisodesForSummary: 1, // Minimum episodes to generate summary + summaryEpisodeThreshold: 5, // Minimum new episodes required to trigger summary (configurable) +}; + +export interface SpaceSummaryResult { + success: boolean; + spaceId: string; + triggerSource: string; + summary?: { + statementCount: number; + confidence: number; + themesCount: number; + } | null; + reason?: string; +} + +/** + * Core business logic for space summary generation + * This is shared between Trigger.dev and BullMQ implementations + */ +export async function processSpaceSummary( + payload: SpaceSummaryPayload, +): Promise { + const { userId, spaceId, triggerSource = "manual" } = payload; + + logger.info(`Starting space summary generation`, { + userId, + spaceId, + triggerSource, + }); + + try { + // Update status to processing + await updateSpaceStatus(spaceId, SPACE_STATUS.PROCESSING, { + userId, + operation: "space-summary", + metadata: { triggerSource, phase: "start_summary" }, + }); + + // Generate summary for the single space + const summaryResult = await generateSpaceSummary( + spaceId, + userId, + triggerSource, + ); + + if (summaryResult) { + // Store the summary + await storeSummary(summaryResult); + + // Update status to ready after successful completion + await updateSpaceStatus(spaceId, SPACE_STATUS.READY, { + userId, + operation: "space-summary", + metadata: { + triggerSource, + phase: "completed_summary", + contextCount: summaryResult.contextCount, + confidence: summaryResult.confidence, + }, + }); + + logger.info(`Generated summary for space ${spaceId}`, { + statementCount: summaryResult.contextCount, + confidence: summaryResult.confidence, + themes: summaryResult.themes.length, + triggerSource, + }); + + return { + success: true, + spaceId, + triggerSource, + summary: { + statementCount: summaryResult.contextCount, + confidence: summaryResult.confidence, + themesCount: summaryResult.themes.length, + }, + }; + } else { + // No summary generated - this could be due to insufficient episodes or no new episodes + // This is not an error state, so update status to ready + await updateSpaceStatus(spaceId, SPACE_STATUS.READY, { + userId, + operation: "space-summary", + metadata: { + triggerSource, + phase: "no_summary_needed", + reason: "Insufficient episodes or no new episodes to summarize", + }, + }); + + logger.info( + `No summary generated for space ${spaceId} - insufficient or no new episodes`, + ); + return { + success: true, + spaceId, + triggerSource, + summary: null, + reason: "No episodes to summarize", + }; + } + } catch (error) { + // Update status to error on exception + try { + await updateSpaceStatus(spaceId, SPACE_STATUS.ERROR, { + userId, + operation: "space-summary", + metadata: { + triggerSource, + phase: "exception", + error: error instanceof Error ? error.message : "Unknown error", + }, + }); + } catch (statusError) { + logger.warn(`Failed to update status to error for space ${spaceId}`, { + statusError, + }); + } + + logger.error( + `Error in space summary generation for space ${spaceId}:`, + error as Record, + ); + throw error; + } +} + +async function generateSpaceSummary( + spaceId: string, + userId: string, + triggerSource?: "assignment" | "manual" | "scheduled", +): Promise { + try { + // 1. Get space details + const spaceService = new SpaceService(); + const space = await spaceService.getSpace(spaceId, userId); + + if (!space) { + logger.warn(`Space ${spaceId} not found for user ${userId}`); + return null; + } + + // 2. Check episode count threshold (skip for manual triggers) + if (triggerSource !== "manual") { + const currentEpisodeCount = await getSpaceEpisodeCount(spaceId, userId); + const lastSummaryEpisodeCount = space.contextCount || 0; + const episodeDifference = currentEpisodeCount - lastSummaryEpisodeCount; + + if ( + episodeDifference < CONFIG.summaryEpisodeThreshold || + lastSummaryEpisodeCount !== 0 + ) { + logger.info( + `Skipping summary generation for space ${spaceId}: only ${episodeDifference} new episodes (threshold: ${CONFIG.summaryEpisodeThreshold})`, + { + currentEpisodeCount, + lastSummaryEpisodeCount, + episodeDifference, + threshold: CONFIG.summaryEpisodeThreshold, + }, + ); + return null; + } + + logger.info( + `Proceeding with summary generation for space ${spaceId}: ${episodeDifference} new episodes (threshold: ${CONFIG.summaryEpisodeThreshold})`, + { + currentEpisodeCount, + lastSummaryEpisodeCount, + episodeDifference, + }, + ); + } + + // 2. Check for existing summary + const existingSummary = await getExistingSummary(spaceId); + const isIncremental = existingSummary !== null; + + // 3. Get episodes (all or new ones based on existing summary) + const episodes = await getSpaceEpisodes( + spaceId, + userId, + isIncremental ? existingSummary?.lastUpdated : undefined, + ); + + // Handle case where no new episodes exist for incremental update + if (isIncremental && episodes.length === 0) { + logger.info( + `No new episodes found for space ${spaceId}, skipping summary update`, + ); + return null; + } + + // Check minimum episode requirement for new summaries only + if (!isIncremental && episodes.length < CONFIG.minEpisodesForSummary) { + logger.info( + `Space ${spaceId} has insufficient episodes (${episodes.length}) for new summary`, + ); + return null; + } + + // 4. Process episodes using unified approach + let summaryResult; + + if (episodes.length > CONFIG.maxEpisodesForSummary) { + logger.info( + `Large space detected (${episodes.length} episodes). Processing in batches.`, + ); + + // Process in batches, each building on previous result + const batches: SpaceEpisodeData[][] = []; + for (let i = 0; i < episodes.length; i += CONFIG.maxEpisodesForSummary) { + batches.push(episodes.slice(i, i + CONFIG.maxEpisodesForSummary)); + } + + let currentSummary = existingSummary?.summary || null; + let currentThemes = existingSummary?.themes || []; + let cumulativeConfidence = 0; + + for (const [batchIndex, batch] of batches.entries()) { + logger.info( + `Processing batch ${batchIndex + 1}/${batches.length} with ${batch.length} episodes`, + ); + + const batchResult = await generateUnifiedSummary( + space.name, + space.description as string, + batch, + currentSummary, + currentThemes, + ); + + if (batchResult) { + currentSummary = batchResult.summary; + currentThemes = batchResult.themes; + cumulativeConfidence += batchResult.confidence; + } else { + logger.warn(`Failed to process batch ${batchIndex + 1}`); + } + + // Small delay between batches + if (batchIndex < batches.length - 1) { + await new Promise((resolve) => setTimeout(resolve, 500)); + } + } + + summaryResult = currentSummary + ? { + summary: currentSummary, + themes: currentThemes, + confidence: Math.min(cumulativeConfidence / batches.length, 1.0), + } + : null; + } else { + logger.info( + `Processing ${episodes.length} episodes with unified approach`, + ); + + // Use unified approach for smaller spaces + summaryResult = await generateUnifiedSummary( + space.name, + space.description as string, + episodes, + existingSummary?.summary || null, + existingSummary?.themes || [], + ); + } + + if (!summaryResult) { + logger.warn(`Failed to generate LLM summary for space ${spaceId}`); + return null; + } + + // Get the actual current counts from Neo4j + const currentEpisodeCount = await getSpaceEpisodeCount(spaceId, userId); + + return { + spaceId: space.uuid, + spaceName: space.name, + spaceDescription: space.description as string, + contextCount: currentEpisodeCount, + summary: summaryResult.summary, + keyEntities: summaryResult.keyEntities || [], + themes: summaryResult.themes, + confidence: summaryResult.confidence, + lastUpdated: new Date(), + isIncremental, + }; + } catch (error) { + logger.error( + `Error generating summary for space ${spaceId}:`, + error as Record, + ); + return null; + } +} + +async function generateUnifiedSummary( + spaceName: string, + spaceDescription: string | undefined, + episodes: SpaceEpisodeData[], + previousSummary: string | null = null, + previousThemes: string[] = [], +): Promise<{ + summary: string; + themes: string[]; + confidence: number; + keyEntities?: string[]; +} | null> { + try { + const prompt = createUnifiedSummaryPrompt( + spaceName, + spaceDescription, + episodes, + previousSummary, + previousThemes, + ); + + // Space summary generation requires HIGH complexity (creative synthesis, narrative generation) + let responseText = ""; + await makeModelCall( + false, + prompt, + (text: string) => { + responseText = text; + }, + undefined, + "high", + ); + + return parseSummaryResponse(responseText); + } catch (error) { + logger.error( + "Error generating unified summary:", + error as Record, + ); + return null; + } +} + +function createUnifiedSummaryPrompt( + spaceName: string, + spaceDescription: string | undefined, + episodes: SpaceEpisodeData[], + previousSummary: string | null, + previousThemes: string[], +): CoreMessage[] { + // If there are no episodes and no previous summary, we cannot generate a meaningful summary + if (episodes.length === 0 && previousSummary === null) { + throw new Error( + "Cannot generate summary without episodes or existing summary", + ); + } + + const episodesText = episodes + .map( + (episode) => + `- ${episode.content} (Source: ${episode.source}, Session: ${episode.sessionId || "N/A"})`, + ) + .join("\n"); + + // Extract key entities and themes from episode content + const contentWords = episodes + .map((ep) => ep.content.toLowerCase()) + .join(" ") + .split(/\s+/) + .filter((word) => word.length > 3); + + const wordFrequency = new Map(); + contentWords.forEach((word) => { + wordFrequency.set(word, (wordFrequency.get(word) || 0) + 1); + }); + + const topEntities = Array.from(wordFrequency.entries()) + .sort(([, a], [, b]) => b - a) + .slice(0, 10) + .map(([word]) => word); + + const isUpdate = previousSummary !== null; + + return [ + { + role: "system", + content: `You are an expert at analyzing and summarizing episodes within semantic spaces based on the space's intent and purpose. Your task is to ${isUpdate ? "update an existing summary by integrating new episodes" : "create a comprehensive summary of episodes"}. + +CRITICAL RULES: +1. Base your summary ONLY on insights derived from the actual content/episodes provided +2. Use the space's INTENT/PURPOSE (from description) to guide what to summarize and how to organize it +3. Write in a factual, neutral tone - avoid promotional language ("pivotal", "invaluable", "cutting-edge") +4. Be specific and concrete - reference actual content, patterns, and insights found in the episodes +5. If episodes are insufficient for meaningful insights, state that more data is needed + +INTENT-DRIVEN SUMMARIZATION: +Your summary should SERVE the space's intended purpose. Examples: +- "Learning React" β†’ Summarize React concepts, patterns, techniques learned +- "Project X Updates" β†’ Summarize progress, decisions, blockers, next steps +- "Health Tracking" β†’ Summarize metrics, trends, observations, insights +- "Guidelines for React" β†’ Extract actionable patterns, best practices, rules +- "Evolution of design thinking" β†’ Track how thinking changed over time, decision points +The intent defines WHY this space exists - organize content to serve that purpose. + +INSTRUCTIONS: +${ + isUpdate + ? `1. Review the existing summary and themes carefully +2. Analyze the new episodes for patterns and insights that align with the space's intent +3. Identify connecting points between existing knowledge and new episodes +4. Update the summary to seamlessly integrate new information while preserving valuable existing insights +5. Evolve themes by adding new ones or refining existing ones based on the space's purpose +6. Organize the summary to serve the space's intended use case` + : `1. Analyze the semantic content and relationships within the episodes +2. Identify topics/sections that align with the space's INTENT and PURPOSE +3. Create a coherent summary that serves the space's intended use case +4. Organize the summary based on the space's purpose (not generic frequency-based themes)` +} +${isUpdate ? "7" : "5"}. Assess your confidence in the ${isUpdate ? "updated" : ""} summary quality (0.0-1.0) + +INTENT-ALIGNED ORGANIZATION: +- Organize sections based on what serves the space's purpose +- Topics don't need minimum episode counts - relevance to intent matters most +- Each section should provide value aligned with the space's intended use +- For "guidelines" spaces: focus on actionable patterns +- For "tracking" spaces: focus on temporal patterns and changes +- For "learning" spaces: focus on concepts and insights gained +- Let the space's intent drive the structure, not rigid rules + +${ + isUpdate + ? `CONNECTION FOCUS: +- Entity relationships that span across batches/time +- Theme evolution and expansion +- Temporal patterns and progressions +- Contradictions or confirmations of existing insights +- New insights that complement existing knowledge` + : "" +} + +RESPONSE FORMAT: +Provide your response inside tags with valid JSON. Include both HTML summary and markdown format. + + +{ + "summary": "${isUpdate ? "Updated HTML summary that integrates new insights with existing knowledge. Write factually about what the statements reveal - mention specific entities, relationships, and patterns found in the data. Avoid marketing language. Use HTML tags for structure." : "Factual HTML summary based on patterns found in the statements. Report what the data actually shows - specific entities, relationships, frequencies, and concrete insights. Avoid promotional language. Use HTML tags like

, ,

    ,
  • for structure. Keep it concise and evidence-based."}", + "keyEntities": ["entity1", "entity2", "entity3"], + "themes": ["${isUpdate ? 'updated_theme1", "new_theme2", "evolved_theme3' : 'theme1", "theme2", "theme3'}"], + "confidence": 0.85 +} + + +JSON FORMATTING RULES: +- HTML content in summary field is allowed and encouraged +- Escape quotes within strings as \" +- Escape HTML angle brackets if needed: < and > +- Use proper HTML tags for structure:

    , , ,

      ,
    • ,

      , etc. +- HTML content should be well-formed and semantic + +GUIDELINES: +${ + isUpdate + ? `- Preserve valuable insights from existing summary +- Integrate new information by highlighting connections +- Themes should evolve naturally, don't replace wholesale +- The updated summary should read as a coherent whole +- Make the summary user-friendly and explain what value this space provides` + : `- Report only what the episodes actually reveal - be specific and concrete +- Cite actual content and patterns found in the episodes +- Avoid generic descriptions that could apply to any space +- Use neutral, factual language - no "comprehensive", "robust", "cutting-edge" etc. +- Themes must be backed by at least 3 supporting episodes with clear evidence +- Better to have fewer, well-supported themes than many weak ones +- Confidence should reflect actual data quality and coverage, not aspirational goals` +}`, + }, + { + role: "user", + content: `SPACE INFORMATION: +Name: "${spaceName}" +Intent/Purpose: ${spaceDescription || "No specific intent provided - organize naturally based on content"} + +${ + isUpdate + ? `EXISTING SUMMARY: +${previousSummary} + +EXISTING THEMES: +${previousThemes.join(", ")} + +NEW EPISODES TO INTEGRATE (${episodes.length} episodes):` + : `EPISODES IN THIS SPACE (${episodes.length} episodes):` +} +${episodesText} + +${ + episodes.length > 0 + ? `TOP WORDS BY FREQUENCY: +${topEntities.join(", ")}` + : "" +} + +${ + isUpdate + ? "Please identify connections between the existing summary and new episodes, then update the summary to integrate the new insights coherently. Organize the summary to SERVE the space's intent/purpose. Remember: only summarize insights from the actual episode content." + : "Please analyze the episodes and provide a comprehensive summary that SERVES the space's intent/purpose. Organize sections based on what would be most valuable for this space's intended use case. If the intent is unclear, organize naturally based on content patterns. Only summarize insights from actual episode content." +}`, + }, + ]; +} + +async function getExistingSummary(spaceId: string): Promise<{ + summary: string; + themes: string[]; + lastUpdated: Date; + contextCount: number; +} | null> { + try { + const existingSummary = await getSpace(spaceId); + + if (existingSummary?.summary) { + return { + summary: existingSummary.summary, + themes: existingSummary.themes, + lastUpdated: existingSummary.summaryGeneratedAt || new Date(), + contextCount: existingSummary.contextCount || 0, + }; + } + + return null; + } catch (error) { + logger.warn(`Failed to get existing summary for space ${spaceId}:`, { + error, + }); + return null; + } +} + +async function getSpaceEpisodes( + spaceId: string, + userId: string, + sinceDate?: Date, +): Promise { + // Query episodes directly using Space-[:HAS_EPISODE]->Episode relationships + const params: any = { spaceId, userId }; + + let dateCondition = ""; + if (sinceDate) { + dateCondition = "AND e.createdAt > $sinceDate"; + params.sinceDate = sinceDate.toISOString(); + } + + const query = ` + MATCH (space:Space {uuid: $spaceId, userId: $userId})-[:HAS_EPISODE]->(e:Episode {userId: $userId}) + WHERE e IS NOT NULL ${dateCondition} + RETURN DISTINCT e + ORDER BY e.createdAt DESC + `; + + const result = await runQuery(query, params); + + return result.map((record) => { + const episode = record.get("e").properties; + return { + uuid: episode.uuid, + content: episode.content, + originalContent: episode.originalContent, + source: episode.source, + createdAt: new Date(episode.createdAt), + validAt: new Date(episode.validAt), + metadata: JSON.parse(episode.metadata || "{}"), + sessionId: episode.sessionId, + }; + }); +} + +function parseSummaryResponse(response: string): { + summary: string; + themes: string[]; + confidence: number; + keyEntities?: string[]; +} | null { + try { + // Extract content from tags + const outputMatch = response.match(/([\s\S]*?)<\/output>/); + if (!outputMatch) { + logger.warn("No tags found in LLM summary response"); + logger.debug("Full LLM response:", { response }); + return null; + } + + let jsonContent = outputMatch[1].trim(); + + let parsed; + try { + parsed = JSON.parse(jsonContent); + } catch (jsonError) { + logger.warn("JSON parsing failed, attempting cleanup and retry", { + originalError: jsonError, + jsonContent: jsonContent.substring(0, 500) + "...", // Log first 500 chars + }); + + // More aggressive cleanup for malformed JSON + jsonContent = jsonContent + .replace(/([^\\])"/g, '$1\\"') // Escape unescaped quotes + .replace(/^"/g, '\\"') // Escape quotes at start + .replace(/\\\\"/g, '\\"'); // Fix double-escaped quotes + + parsed = JSON.parse(jsonContent); + } + + // Validate the response structure + const validationResult = SummaryResultSchema.safeParse(parsed); + if (!validationResult.success) { + logger.warn("Invalid LLM summary response format:", { + error: validationResult.error, + parsedData: parsed, + }); + return null; + } + + return validationResult.data; + } catch (error) { + logger.error( + "Error parsing LLM summary response:", + error as Record, + ); + logger.debug("Failed response content:", { response }); + return null; + } +} + +async function storeSummary(summaryData: SpaceSummaryData): Promise { + try { + // Store in PostgreSQL for API access and persistence + await updateSpace(summaryData); + + // Also store in Neo4j for graph-based queries + const query = ` + MATCH (space:Space {uuid: $spaceId}) + SET space.summary = $summary, + space.keyEntities = $keyEntities, + space.themes = $themes, + space.summaryConfidence = $confidence, + space.summaryContextCount = $contextCount, + space.summaryLastUpdated = datetime($lastUpdated) + RETURN space + `; + + await runQuery(query, { + spaceId: summaryData.spaceId, + summary: summaryData.summary, + keyEntities: summaryData.keyEntities, + themes: summaryData.themes, + confidence: summaryData.confidence, + contextCount: summaryData.contextCount, + lastUpdated: summaryData.lastUpdated.toISOString(), + }); + + logger.info(`Stored summary for space ${summaryData.spaceId}`, { + themes: summaryData.themes.length, + keyEntities: summaryData.keyEntities.length, + confidence: summaryData.confidence, + }); + } catch (error) { + logger.error( + `Error storing summary for space ${summaryData.spaceId}:`, + error as Record, + ); + throw error; + } +} diff --git a/apps/webapp/app/lib/queue-adapter.server.ts b/apps/webapp/app/lib/queue-adapter.server.ts index 38bac92..4a867dc 100644 --- a/apps/webapp/app/lib/queue-adapter.server.ts +++ b/apps/webapp/app/lib/queue-adapter.server.ts @@ -15,7 +15,8 @@ import type { z } from "zod"; import type { IngestBodyRequest } from "~/jobs/ingest/ingest-episode.logic"; import type { CreateConversationTitlePayload } from "~/jobs/conversation/create-title.logic"; import type { SessionCompactionPayload } from "~/jobs/session/session-compaction.logic"; -import { type SpaceAssignmentPayload } from "~/trigger/spaces/space-assignment"; +import type { SpaceAssignmentPayload } from "~/jobs/spaces/space-assignment.logic"; +import type { SpaceSummaryPayload } from "~/jobs/spaces/space-summary.logic"; type QueueProvider = "trigger" | "bullmq"; @@ -144,22 +145,53 @@ export async function enqueueSessionCompaction( /** * Enqueue space assignment job - * (Helper for common job logic to call) */ export async function enqueueSpaceAssignment( payload: SpaceAssignmentPayload, -): Promise { +): Promise<{ id?: string }> { const provider = env.QUEUE_PROVIDER as QueueProvider; if (provider === "trigger") { const { triggerSpaceAssignment } = await import( "~/trigger/spaces/space-assignment" ); - await triggerSpaceAssignment(payload); + const handler = await triggerSpaceAssignment(payload); + return { id: handler.id }; } else { - // For BullMQ, space assignment is not implemented yet - // You can add it later when needed - console.warn("Space assignment not implemented for BullMQ yet"); + // BullMQ + const { spaceAssignmentQueue } = await import("~/bullmq/queues"); + const job = await spaceAssignmentQueue.add("space-assignment", payload, { + jobId: `space-assignment-${payload.userId}-${payload.mode}-${Date.now()}`, + attempts: 3, + backoff: { type: "exponential", delay: 2000 }, + }); + return { id: job.id }; + } +} + +/** + * Enqueue space summary job + */ +export async function enqueueSpaceSummary( + payload: SpaceSummaryPayload, +): Promise<{ id?: string }> { + const provider = env.QUEUE_PROVIDER as QueueProvider; + + if (provider === "trigger") { + const { triggerSpaceSummary } = await import( + "~/trigger/spaces/space-summary" + ); + const handler = await triggerSpaceSummary(payload); + return { id: handler.id }; + } else { + // BullMQ + const { spaceSummaryQueue } = await import("~/bullmq/queues"); + const job = await spaceSummaryQueue.add("space-summary", payload, { + jobId: `space-summary-${payload.spaceId}-${Date.now()}`, + attempts: 3, + backoff: { type: "exponential", delay: 2000 }, + }); + return { id: job.id }; } } @@ -168,6 +200,7 @@ export async function enqueueSpaceAssignment( */ export async function enqueueBertTopicAnalysis(payload: { userId: string; + workspaceId: string; minTopicSize?: number; nrTopics?: number; }): Promise<{ id?: string }> { @@ -190,7 +223,6 @@ export async function enqueueBertTopicAnalysis(payload: { jobId: `bert-${payload.userId}-${Date.now()}`, attempts: 2, // Only 2 attempts for expensive operations backoff: { type: "exponential", delay: 5000 }, - timeout: 300000, // 5 minute timeout }); return { id: job.id }; } diff --git a/apps/webapp/app/routes/api.v1.conversation._index.tsx b/apps/webapp/app/routes/api.v1.conversation._index.tsx index 8e92a4c..922f02c 100644 --- a/apps/webapp/app/routes/api.v1.conversation._index.tsx +++ b/apps/webapp/app/routes/api.v1.conversation._index.tsx @@ -19,7 +19,10 @@ import { import { getModel } from "~/lib/model.server"; import { UserTypeEnum } from "@core/types"; import { nanoid } from "nanoid"; -import { getOrCreatePersonalAccessToken } from "~/services/personalAccessToken.server"; +import { + deletePersonalAccessToken, + getOrCreatePersonalAccessToken, +} from "~/services/personalAccessToken.server"; import { hasAnswer, hasQuestion, @@ -126,6 +129,7 @@ const { loader, action } = createHybridActionApiRoute( }); result.consumeStream(); // no await + await deletePersonalAccessToken(pat?.id); return result.toUIMessageStreamResponse({ originalMessages: validatedMessages, diff --git a/apps/webapp/app/routes/api.v1.logs.tsx b/apps/webapp/app/routes/api.v1.logs.tsx index 0a06c57..78950e7 100644 --- a/apps/webapp/app/routes/api.v1.logs.tsx +++ b/apps/webapp/app/routes/api.v1.logs.tsx @@ -1,6 +1,7 @@ import { json } from "@remix-run/node"; import { z } from "zod"; import { prisma } from "~/db.server"; + import { createHybridLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; // Schema for logs search parameters diff --git a/apps/webapp/app/services/clustering.server.ts b/apps/webapp/app/services/clustering.server.ts deleted file mode 100644 index e69de29..0000000 diff --git a/apps/webapp/app/services/graphModels/space.ts b/apps/webapp/app/services/graphModels/space.ts index be0fbfb..054af4e 100644 --- a/apps/webapp/app/services/graphModels/space.ts +++ b/apps/webapp/app/services/graphModels/space.ts @@ -45,6 +45,43 @@ export async function createSpace( }; } +/** + * Get all active spaces for a user + */ +export async function getAllSpacesForUser( + userId: string, +): Promise { + const query = ` + MATCH (s:Space {userId: $userId}) + WHERE s.isActive = true + + // Count episodes assigned to each space + OPTIONAL MATCH (s)-[:HAS_EPISODE]->(e:Episode {userId: $userId}) + + WITH s, count(e) as episodeCount + RETURN s, episodeCount + ORDER BY s.createdAt DESC + `; + + const result = await runQuery(query, { userId }); + + return result.map((record) => { + const spaceData = record.get("s").properties; + const episodeCount = record.get("episodeCount") || 0; + + return { + uuid: spaceData.uuid, + name: spaceData.name, + description: spaceData.description, + userId: spaceData.userId, + createdAt: new Date(spaceData.createdAt), + updatedAt: new Date(spaceData.updatedAt), + isActive: spaceData.isActive, + contextCount: Number(episodeCount), + }; + }); +} + /** * Get a specific space by ID */ diff --git a/apps/webapp/app/services/mcp.server.ts b/apps/webapp/app/services/mcp.server.ts index 3800f60..f44fb54 100644 --- a/apps/webapp/app/services/mcp.server.ts +++ b/apps/webapp/app/services/mcp.server.ts @@ -58,6 +58,7 @@ async function createMcpServer( // Handle memory tools and integration meta-tools if ( name.startsWith("memory_") || + name === "get_session_id" || name === "get_integrations" || name === "get_integration_actions" || name === "execute_integration_action" diff --git a/apps/webapp/app/services/space.server.ts b/apps/webapp/app/services/space.server.ts index dcf213b..058046b 100644 --- a/apps/webapp/app/services/space.server.ts +++ b/apps/webapp/app/services/space.server.ts @@ -23,7 +23,10 @@ export class SpaceService { /** * Create a new space for a user */ - async createSpace(params: CreateSpaceParams): Promise { + async createSpace( + params: CreateSpaceParams, + options?: { skipAutoAssignment?: boolean }, + ): Promise { logger.info(`Creating space "${params.name}" for user ${params.userId}`); // Validate input @@ -67,23 +70,25 @@ export class SpaceService { // Track space creation trackFeatureUsage("space_created", params.userId).catch(console.error); - // Trigger automatic LLM assignment for the new space - try { - await enqueueSpaceAssignment({ - userId: params.userId, - workspaceId: params.workspaceId, - mode: "new_space", - newSpaceId: space.id, - batchSize: 25, // Analyze recent statements for the new space - }); + // Trigger automatic LLM assignment for the new space (unless skipped) + if (!options?.skipAutoAssignment) { + try { + await enqueueSpaceAssignment({ + userId: params.userId, + workspaceId: params.workspaceId, + mode: "new_space", + newSpaceId: space.id, + batchSize: 25, // Analyze recent statements for the new space + }); - logger.info(`Triggered LLM space assignment for new space ${space.id}`); - } catch (error) { - // Don't fail space creation if LLM assignment fails - logger.warn( - `Failed to trigger LLM assignment for space ${space.id}:`, - error as Record, - ); + logger.info(`Triggered LLM space assignment for new space ${space.id}`); + } catch (error) { + // Don't fail space creation if LLM assignment fails + logger.warn( + `Failed to trigger LLM assignment for space ${space.id}:`, + error as Record, + ); + } } return space; diff --git a/apps/webapp/app/trigger/bert/topic-analysis.ts b/apps/webapp/app/trigger/bert/topic-analysis.ts index 7473e32..21111b2 100644 --- a/apps/webapp/app/trigger/bert/topic-analysis.ts +++ b/apps/webapp/app/trigger/bert/topic-analysis.ts @@ -1,8 +1,30 @@ import { task } from "@trigger.dev/sdk/v3"; +import { python } from "@trigger.dev/python"; import { processTopicAnalysis, type TopicAnalysisPayload, } from "~/jobs/bert/topic-analysis.logic"; +import { spaceSummaryTask } from "~/trigger/spaces/space-summary"; + +/** + * Python runner for Trigger.dev using python.runScript + */ +async function runBertWithTriggerPython( + userId: string, + minTopicSize: number, + nrTopics?: number, +): Promise { + const args = [userId, "--json", "--min-topic-size", String(minTopicSize)]; + + if (nrTopics) { + args.push("--nr-topics", String(nrTopics)); + } + + console.log(`[BERT Topic Analysis] Running with Trigger.dev Python: args=${args.join(" ")}`); + + const result = await python.runScript("./apps/webapp/app/bert/main.py", args); + return result.stdout; +} /** * Trigger.dev task for BERT topic analysis @@ -16,6 +38,14 @@ export const bertTopicAnalysisTask = task({ concurrencyLimit: 3, // Max 3 parallel analyses to avoid CPU overload }, run: async (payload: TopicAnalysisPayload) => { - return await processTopicAnalysis(payload); + return await processTopicAnalysis( + payload, + // Callback to enqueue space summary + async (params) => { + await spaceSummaryTask.trigger(params); + }, + // Python runner for Trigger.dev + runBertWithTriggerPython, + ); }, }); diff --git a/apps/webapp/app/trigger/ingest/retry-no-credits.ts b/apps/webapp/app/trigger/ingest/retry-no-credits.ts index dcc0556..7eab24e 100644 --- a/apps/webapp/app/trigger/ingest/retry-no-credits.ts +++ b/apps/webapp/app/trigger/ingest/retry-no-credits.ts @@ -1,9 +1,9 @@ import { task } from "@trigger.dev/sdk"; import { z } from "zod"; -import { IngestionQueue, IngestionStatus } from "@core/database"; +import { IngestionStatus } from "@core/database"; import { logger } from "~/services/logger.service"; import { prisma } from "../utils/prisma"; -import { IngestBodyRequest, ingestTask } from "./ingest"; +import { type IngestBodyRequest, ingestTask } from "./ingest"; export const RetryNoCreditBodyRequest = z.object({ workspaceId: z.string(), @@ -43,9 +43,7 @@ export const retryNoCreditsTask = task({ }; } - logger.log( - `Found ${noCreditItems.length} NO_CREDITS episodes to retry`, - ); + logger.log(`Found ${noCreditItems.length} NO_CREDITS episodes to retry`); const results = { total: noCreditItems.length, diff --git a/apps/webapp/app/trigger/spaces/space-assignment.ts b/apps/webapp/app/trigger/spaces/space-assignment.ts index 23359d7..bfc815f 100644 --- a/apps/webapp/app/trigger/spaces/space-assignment.ts +++ b/apps/webapp/app/trigger/spaces/space-assignment.ts @@ -1,79 +1,12 @@ import { queue, task } from "@trigger.dev/sdk/v3"; import { logger } from "~/services/logger.service"; -import { SpaceService } from "~/services/space.server"; -import { makeModelCall } from "~/lib/model.server"; -import { createBatch, getBatch } from "~/lib/batch.server"; -import { runQuery } from "~/lib/neo4j.server"; import { - assignEpisodesToSpace, - getSpaceEpisodeCount, -} from "~/services/graphModels/space"; + processSpaceAssignment, + type SpaceAssignmentPayload, +} from "~/jobs/spaces/space-assignment.logic"; import { triggerSpaceSummary } from "./space-summary"; -import { triggerSpacePattern } from "./space-pattern"; -import { - updateMultipleSpaceStatuses, - SPACE_STATUS, -} from "../utils/space-status"; -import type { CoreMessage } from "ai"; -import { z } from "zod"; -import { type Space } from "@prisma/client"; -export interface SpaceAssignmentPayload { - userId: string; - workspaceId: string; - mode: "new_space" | "episode"; - newSpaceId?: string; // For new_space mode - episodeIds?: string[]; // For daily_batch mode (default: 1) - batchSize?: number; // Processing batch size -} - -interface EpisodeData { - uuid: string; - content: string; - originalContent: string; - source: string; - createdAt: Date; - metadata: any; -} - -interface SpaceData { - uuid: string; - name: string; - description?: string; - episodeCount: number; -} - -interface AssignmentResult { - episodeId: string; - spaceIds: string[]; - confidence: number; - reasoning?: string; -} - -const CONFIG = { - newSpaceMode: { - batchSize: 20, - confidenceThreshold: 0.75, // Intent-based threshold for new space creation - useBatchAPI: true, // Use batch API for new space mode - minEpisodesForBatch: 5, // Minimum episodes to use batch API - }, - episodeMode: { - batchSize: 20, - confidenceThreshold: 0.75, // Intent-based threshold for episode assignment - useBatchAPI: true, // Use batch API for episode mode - minEpisodesForBatch: 5, // Minimum episodes to use batch API - }, -}; - -// Zod schema for LLM response validation -const AssignmentResultSchema = z.array( - z.object({ - episodeId: z.string(), - addSpaceId: z.array(z.string()), - confidence: z.number(), - reasoning: z.string(), - }), -); +export type { SpaceAssignmentPayload }; const spaceAssignmentQueue = queue({ name: "space-assignment-queue", @@ -85,1110 +18,22 @@ export const spaceAssignmentTask = task({ queue: spaceAssignmentQueue, maxDuration: 1800, // 15 minutes timeout run: async (payload: SpaceAssignmentPayload) => { - const { - userId, - workspaceId, - mode, - newSpaceId, - episodeIds, - batchSize = mode === "new_space" - ? CONFIG.newSpaceMode.batchSize - : CONFIG.episodeMode.batchSize, - } = payload; - - logger.info(`Starting space assignment`, { - userId, - mode, - newSpaceId, - episodeIds, - batchSize, + logger.info(`[Trigger.dev] Starting space assignment task`, { + userId: payload.userId, + mode: payload.mode, }); - const spaceService = new SpaceService(); - - try { - // 1. Get user's spaces - const spaces = await spaceService.getUserSpaces(userId); - - if (spaces.length === 0) { - logger.info(`No spaces found for user ${userId}, skipping assignment`); - return { - success: true, - message: "No spaces to assign to", - processed: 0, - }; - } - - // 2. Get episodes to analyze based on mode - const episodes = await getEpisodesToAnalyze(userId, mode, { - newSpaceId, - episodeIds, - }); - - if (episodes.length === 0) { - logger.info( - `No episodes to analyze for user ${userId} in ${mode} mode`, - ); - return { - success: true, - message: "No episodes to analyze", - processed: 0, - }; - } - - // 3. Process episodes using batch AI or fallback to sequential - const config = - mode === "new_space" ? CONFIG.newSpaceMode : CONFIG.episodeMode; - // const shouldUseBatchAPI = - // config.useBatchAPI && episodes.length >= config.minEpisodesForBatch; - const shouldUseBatchAPI = true; - - let totalProcessed = 0; - let totalAssignments = 0; - let totalBatches = 0; - const affectedSpaces = new Set(); // Track spaces that received new episodes - - if (shouldUseBatchAPI) { - logger.info( - `Using Batch AI processing for ${episodes.length} episodes`, - { - mode, - userId, - batchSize, - }, - ); - - const batchResult = await processBatchAI( - episodes, - spaces, - userId, - mode, - newSpaceId, - batchSize, - ); - totalProcessed = batchResult.processed; - totalAssignments = batchResult.assignments; - batchResult.affectedSpaces?.forEach((spaceId) => - affectedSpaces.add(spaceId), - ); - } else { - logger.info( - `Using sequential processing for ${episodes.length} episodes (below batch threshold)`, - { - mode, - userId, - minRequired: config.minEpisodesForBatch, - }, - ); - - // Fallback to sequential processing for smaller episode sets - totalBatches = Math.ceil(episodes.length / batchSize); - - for (let i = 0; i < totalBatches; i++) { - const batch = episodes.slice(i * batchSize, (i + 1) * batchSize); - - logger.info( - `Processing batch ${i + 1}/${totalBatches} with ${batch.length} episodes`, - { - mode, - userId, - }, - ); - - const batchResult = await processBatch( - batch, - spaces, - userId, - mode, - newSpaceId, - ); - totalProcessed += batchResult.processed; - totalAssignments += batchResult.assignments; - batchResult.affectedSpaces?.forEach((spaceId) => - affectedSpaces.add(spaceId), - ); - - // Add delay between batches to avoid rate limiting - if (i < totalBatches - 1) { - await new Promise((resolve) => setTimeout(resolve, 1000)); - } - } - } - - logger.info(`Completed LLM space assignment`, { - userId, - mode, - totalProcessed, - totalAssignments, - spacesAvailable: spaces.length, - affectedSpaces: affectedSpaces.size, - }); - - // 4. Update space status to "processing" for affected spaces - if (affectedSpaces.size > 0) { - try { - await updateMultipleSpaceStatuses( - Array.from(affectedSpaces), - SPACE_STATUS.PROCESSING, - { - userId, - operation: "space-assignment", - metadata: { mode, phase: "start_processing" }, - }, - ); - } catch (statusError) { - logger.warn(`Failed to update space statuses to processing:`, { - error: statusError, - userId, - mode, - }); - } - } - - // 5. Trigger space summaries for affected spaces (fan-out pattern) - if (affectedSpaces.size > 0) { - try { - logger.info( - `Triggering space summaries for ${affectedSpaces.size} affected spaces in parallel`, - ); - - // Fan out to multiple parallel triggers - const summaryPromises = Array.from(affectedSpaces).map((spaceId) => - triggerSpaceSummary({ - userId, - workspaceId, - spaceId, - triggerSource: "assignment", - }).catch((error) => { - logger.warn(`Failed to trigger summary for space ${spaceId}:`, { - error, - }); - return { success: false, spaceId, error: error.message }; - }), - ); - - const summaryResults = await Promise.allSettled(summaryPromises); - const successful = summaryResults.filter( - (r) => r.status === "fulfilled", - ).length; - const failed = summaryResults.filter( - (r) => r.status === "rejected", - ).length; - - logger.info(`Space summary triggers completed`, { - userId, - mode, - totalSpaces: affectedSpaces.size, - successful, - failed, - }); - } catch (summaryError) { - // Don't fail the assignment if summary generation fails - logger.warn(`Failed to trigger space summaries after assignment:`, { - error: summaryError, - userId, - mode, - affectedSpaces: Array.from(affectedSpaces), - }); - } - } - - // 6. Update space status to "ready" after all processing is complete - if (affectedSpaces.size > 0) { - try { - await updateMultipleSpaceStatuses( - Array.from(affectedSpaces), - SPACE_STATUS.READY, - { - userId, - operation: "space-assignment", - metadata: { mode, phase: "completed_processing" }, - }, - ); - } catch (finalStatusError) { - logger.warn(`Failed to update space statuses to ready:`, { - error: finalStatusError, - userId, - mode, - }); - } - } - - return { - success: true, - mode, - processed: totalProcessed, - assignments: totalAssignments, - batches: totalBatches, - spacesAvailable: spaces.length, - affectedSpaces: affectedSpaces.size, - summaryTriggered: affectedSpaces.size > 0, - patternCheckTriggered: affectedSpaces.size > 0, - }; - } catch (error) { - logger.error( - `Error in LLM space assignment for user ${userId}:`, - error as Record, - ); - throw error; - } + // Use common business logic with callback for triggering space summaries + return await processSpaceAssignment( + payload, + // Callback to enqueue space summary + async (summaryPayload) => { + return await triggerSpaceSummary(summaryPayload); + }, + ); }, }); -async function getEpisodesToAnalyze( - userId: string, - mode: "new_space" | "episode", - options: { newSpaceId?: string; episodeIds?: string[] }, -): Promise { - let query: string; - let params: any = { userId }; - - if (mode === "new_space") { - // For new space: analyze all recent episodes - query = ` - MATCH (e:Episode {userId: $userId}) - RETURN e - ORDER BY e.createdAt DESC - LIMIT 1000 - `; - } else { - // For episode mode: analyze specific episodes - query = ` - UNWIND $episodeIds AS episodeId - MATCH (e:Episode {uuid: episodeId, userId: $userId}) - RETURN e - ORDER BY e.createdAt DESC - `; - params.episodeIds = options.episodeIds; - } - - const result = await runQuery(query, params); - - return result.map((record) => { - const episode = record.get("e").properties; - return { - uuid: episode.uuid, - content: episode.content, - originalContent: episode.originalContent, - source: episode.source, - createdAt: new Date(episode.createdAt), - metadata: JSON.parse(episode.metadata || "{}"), - }; - }); -} - -async function processBatchAI( - episodes: EpisodeData[], - spaces: Space[], - userId: string, - mode: "new_space" | "episode", - newSpaceId?: string, - batchSize: number = 50, -): Promise<{ - processed: number; - assignments: number; - affectedSpaces?: string[]; -}> { - try { - // Create batches of episodes - const episodeBatches: EpisodeData[][] = []; - for (let i = 0; i < episodes.length; i += batchSize) { - episodeBatches.push(episodes.slice(i, i + batchSize)); - } - - logger.info( - `Creating ${episodeBatches.length} batch AI requests for ${episodes.length} episodes`, - ); - - // Create batch requests with prompts - const batchRequests = await Promise.all( - episodeBatches.map(async (batch, index) => { - const promptMessages = await createLLMPrompt( - batch, - spaces, - mode, - newSpaceId, - userId, - ); - const systemPrompt = - promptMessages.find((m) => m.role === "system")?.content || ""; - const userPrompt = - promptMessages.find((m) => m.role === "user")?.content || ""; - - return { - customId: `episode-space-assignment-${userId}-${mode}-${index}`, - messages: [{ role: "user" as const, content: userPrompt }], - systemPrompt, - }; - }), - ); - - // Submit batch to AI provider - const { batchId } = await createBatch({ - requests: batchRequests, - outputSchema: AssignmentResultSchema, - maxRetries: 3, - timeoutMs: 1200000, // 10 minutes timeout - }); - - logger.info(`Batch AI job created: ${batchId}`, { - userId, - mode, - batchRequests: batchRequests.length, - }); - - // Poll for completion with improved handling - const maxPollingTime = 1200000; // 13 minutes - const pollInterval = 5000; // 5 seconds - const startTime = Date.now(); - - let batch = await getBatch({ batchId }); - - while (batch.status === "processing" || batch.status === "pending") { - const elapsed = Date.now() - startTime; - - if (elapsed > maxPollingTime) { - logger.warn( - `Batch AI job timed out after ${elapsed}ms, processing partial results`, - { - batchId, - status: batch.status, - completed: batch.completedRequests, - total: batch.totalRequests, - failed: batch.failedRequests, - }, - ); - break; // Exit loop to process any available results - } - - logger.info(`Batch AI job status: ${batch.status}`, { - batchId, - completed: batch.completedRequests, - total: batch.totalRequests, - failed: batch.failedRequests, - elapsed: elapsed, - }); - - await new Promise((resolve) => setTimeout(resolve, pollInterval)); - batch = await getBatch({ batchId }); - } - - // Handle different completion scenarios - if (batch.status === "failed") { - logger.error(`Batch AI job failed completely`, { - batchId, - status: batch.status, - }); - throw new Error(`Batch AI job failed with status: ${batch.status}`); - } - - // Log final status regardless of completion state - logger.info(`Batch AI job processing finished`, { - batchId, - status: batch.status, - completed: batch.completedRequests, - total: batch.totalRequests, - failed: batch.failedRequests, - }); - - if (!batch.results || batch.results.length === 0) { - logger.warn(`No results returned from batch AI job ${batchId}`, { - status: batch.status, - completed: batch.completedRequests, - failed: batch.failedRequests, - }); - - // If we have no results but some requests failed, fall back to sequential processing - if (batch.failedRequests && batch.failedRequests > 0) { - logger.info( - `Falling back to sequential processing due to batch failures`, - ); - return await processBatch(episodes, spaces, userId, mode, newSpaceId); - } - - return { processed: episodes.length, assignments: 0 }; - } - - logger.info(`Processing batch results`, { - batchId, - status: batch.status, - resultsCount: batch.results.length, - totalRequests: batch.totalRequests, - completedRequests: batch.completedRequests, - failedRequests: batch.failedRequests, - }); - - // Process all batch results - let totalAssignments = 0; - const affectedSpaces = new Set(); - const confidenceThreshold = - mode === "new_space" - ? CONFIG.newSpaceMode.confidenceThreshold - : CONFIG.episodeMode.confidenceThreshold; - - for (const result of batch.results) { - if (result.error) { - logger.warn(`Batch AI request ${result.customId} failed:`, { - error: result.error, - }); - continue; - } - - if (!result.response) { - logger.warn(`No response from batch AI request ${result.customId}`); - continue; - } - - // Parse assignments from this batch result - let assignments: AssignmentResult[] = []; - try { - // Extract episode batch info from customId - const batchIndexMatch = result.customId.match(/-(\d+)$/); - const batchIndex = batchIndexMatch ? parseInt(batchIndexMatch[1]) : 0; - const episodeBatch = episodeBatches[batchIndex]; - - if (Array.isArray(result.response)) { - // Handle direct array response (from structured output) - assignments = result.response.map((a) => ({ - episodeId: a.episodeId, - spaceIds: a.addSpaceId || [], - confidence: a.confidence || 0.75, - reasoning: a.reasoning, - })); - } else if (typeof result.response === "string") { - // Parse from text response with tags (fallback for non-structured output) - assignments = parseLLMResponseWithTags( - result.response, - episodeBatch, - spaces, - ); - } else if (typeof result.response === "object" && result.response) { - // Handle object response that might contain the array directly - try { - let responseData = result.response; - if (responseData.results && Array.isArray(responseData.results)) { - responseData = responseData.results; - } - - if (Array.isArray(responseData)) { - assignments = responseData.map((a) => ({ - episodeId: a.episodeId, - spaceIds: a.addSpaceId || [], - confidence: a.confidence || 0.75, - reasoning: a.reasoning, - })); - } else { - // Fallback parsing - assignments = parseLLMResponse( - JSON.stringify(result.response), - episodeBatch, - spaces, - ); - } - } catch (parseError) { - logger.error( - `Error processing object response ${result.customId}:`, - { error: parseError }, - ); - assignments = []; - } - } else { - // Fallback parsing - assignments = parseLLMResponse( - JSON.stringify(result.response), - episodeBatch, - spaces, - ); - } - } catch (parseError) { - logger.error(`Error parsing batch result ${result.customId}:`, { - error: parseError, - }); - continue; - } - - // Group episodes by space for batch assignment - const spaceToEpisodes = new Map(); - - for (const assignment of assignments) { - if ( - assignment.spaceIds.length > 0 && - assignment.confidence >= confidenceThreshold - ) { - for (const spaceId of assignment.spaceIds) { - if (!spaceToEpisodes.has(spaceId)) { - spaceToEpisodes.set(spaceId, []); - } - spaceToEpisodes.get(spaceId)!.push(assignment.episodeId); - } - } - } - - // Apply batch assignments - one call per space - for (const [spaceId, episodeIds] of spaceToEpisodes) { - try { - const assignmentResult = await assignEpisodesToSpace( - episodeIds, - spaceId, - userId, - ); - - if (assignmentResult.success) { - totalAssignments += episodeIds.length; - affectedSpaces.add(spaceId); - logger.info( - `Batch AI assigned ${episodeIds.length} episodes to space ${spaceId}`, - { - episodeIds, - mode, - batchId: result.customId, - }, - ); - } - } catch (error) { - logger.warn( - `Failed to assign ${episodeIds.length} episodes to space ${spaceId}:`, - { error, episodeIds }, - ); - } - } - } - - // Log final batch processing results - logger.info(`Batch AI processing completed`, { - batchId, - totalEpisodes: episodes.length, - processedBatches: batch.results.length, - totalAssignments, - affectedSpaces: affectedSpaces.size, - completedRequests: batch.completedRequests, - failedRequests: batch.failedRequests || 0, - }); - - // If we have significant failures, consider fallback processing for remaining episodes - const failureRate = batch.failedRequests - ? batch.failedRequests / batch.totalRequests - : 0; - if (failureRate > 0.5) { - // If more than 50% failed - logger.warn( - `High failure rate (${Math.round(failureRate * 100)}%) in batch processing, consider reviewing prompts or input quality`, - ); - } - - return { - processed: episodes.length, - assignments: totalAssignments, - affectedSpaces: Array.from(affectedSpaces), - }; - } catch (error) { - logger.error("Error in Batch AI processing:", { error }); - throw error; - } -} - -async function processBatch( - episodes: EpisodeData[], - spaces: Space[], - userId: string, - mode: "new_space" | "episode", - newSpaceId?: string, -): Promise<{ - processed: number; - assignments: number; - affectedSpaces?: string[]; -}> { - try { - // Create the LLM prompt based on mode - const prompt = await createLLMPrompt( - episodes, - spaces, - mode, - newSpaceId, - userId, - ); - - // Episode-intent matching is MEDIUM complexity (semantic analysis with intent alignment) - let responseText = ""; - await makeModelCall( - false, - prompt, - (text: string) => { - responseText = text; - }, - undefined, - "high", - ); - - // Response text is now set by the callback - - // Parse LLM response - const assignments = parseLLMResponseWithTags( - responseText, - episodes, - spaces, - ); - - // Apply assignments - let totalAssignments = 0; - const affectedSpaces = new Set(); - const confidenceThreshold = - mode === "new_space" - ? CONFIG.newSpaceMode.confidenceThreshold - : CONFIG.episodeMode.confidenceThreshold; - - for (const assignment of assignments) { - if ( - assignment.spaceIds.length > 0 && - assignment.confidence >= confidenceThreshold - ) { - // Assign to each space individually to track metadata properly - for (const spaceId of assignment.spaceIds) { - try { - const result = await assignEpisodesToSpace( - [assignment.episodeId], - spaceId, - userId, - ); - - if (result.success) { - totalAssignments++; - affectedSpaces.add(spaceId); - - logger.info( - `LLM assigned episode ${assignment.episodeId} to space ${spaceId}`, - { - confidence: assignment.confidence, - reasoning: assignment.reasoning || "No reasoning", - mode, - } as Record, - ); - } - } catch (error) { - logger.warn( - `Failed to assign episode ${assignment.episodeId} to space ${spaceId}:`, - error as Record, - ); - } - } - } - } - - return { - processed: episodes.length, - assignments: totalAssignments, - affectedSpaces: Array.from(affectedSpaces), - }; - } catch (error) { - logger.error("Error processing batch:", error as Record); - return { processed: 0, assignments: 0, affectedSpaces: [] }; - } -} - -async function createLLMPrompt( - episodes: EpisodeData[], - spaces: Space[], - mode: "new_space" | "episode", - newSpaceId?: string, - userId?: string, -): Promise { - const episodesDescription = episodes - .map( - (ep) => - `ID: ${ep.uuid}\nCONTENT: ${ep.content}\nSOURCE: ${ep.source}\nMETADATA: ${JSON.stringify(ep.metadata)}`, - ) - .join("\n\n"); - - // Get enhanced space information with episode counts - const enhancedSpaces = await Promise.all( - spaces.map(async (space) => { - const currentCount = userId - ? await getSpaceEpisodeCount(space.id, userId) - : 0; - return { - ...space, - currentEpisodeCount: currentCount, - }; - }), - ); - - if (mode === "new_space" && newSpaceId) { - // Focus on the new space for assignment - const newSpace = enhancedSpaces.find((s) => s.id === newSpaceId); - if (!newSpace) { - throw new Error(`New space ${newSpaceId} not found`); - } - - return [ - { - role: "system", - content: `You are analyzing episodes for assignment to a newly created space based on the space's intent and purpose. - -CORE PRINCIPLE: Match episodes based on WHAT THE EPISODE IS FUNDAMENTALLY ABOUT (its primary subject), not just keyword overlap. - -STEP-BY-STEP FILTERING PROCESS: - -Step 1: IDENTIFY PRIMARY SUBJECT -Ask: "Who or what is this episode fundamentally about?" -- Is it about a specific person? (by name, or "I"/"my" = speaker) -- Is it about a system, tool, or organization? -- Is it about a project, event, or activity? -- Is it about a concept, topic, or idea? - -Step 2: HANDLE IMPLICIT SUBJECTS -- "I prefer..." or "My..." β†’ Subject is the SPEAKER (check episode source/metadata for identity) -- "User discussed..." or "Person X said..." β†’ Subject is that specific person -- "We decided..." β†’ Subject is the group/team/project being discussed -- If unclear, identify from context clues in the episode content - -Step 3: CHECK SUBJECT ALIGNMENT -Does the PRIMARY SUBJECT match what the space is about? -- Match the subject identity (right person/thing/concept?) -- Match the subject relationship (is episode ABOUT the subject or just MENTIONING it?) -- Match the intent purpose (does episode serve the space's purpose?) -- Check scope constraints: If space description includes scope requirements (e.g., "cross-context", "not app-specific", "broadly useful", "stable for 3+ months"), verify episode meets those constraints - -Step 4: DISTINGUISH SUBJECT vs META -Ask: "Is this episode ABOUT the subject itself, or ABOUT discussing/analyzing the subject?" -- ABOUT subject: Episode contains actual content related to subject -- META-discussion: Episode discusses how to handle/analyze/organize the subject -- Only assign if episode is ABOUT the subject, not meta-discussion - -Step 5: VERIFY CONFIDENCE -Only assign if confidence >= 0.75 based on: -- Subject identity clarity (is subject clearly identified?) -- Subject alignment strength (how well does it match space intent?) -- Content relevance (does episode content serve space purpose?) - -CRITICAL RULE: PRIMARY SUBJECT MATCHING -The episode's PRIMARY SUBJECT must match the space's target subject. -- If space is about Person A, episodes about Person B should NOT match (even if same topic) -- If space is about a specific concept, meta-discussions about that concept should NOT match -- If space is about actual behaviors/facts, process discussions about organizing those facts should NOT match - -EXAMPLES OF CORRECT FILTERING: - -Example 1 - Person Identity: -Space: "Alex's work preferences" -Episode A: "I prefer morning meetings and async updates" (speaker: Alex) β†’ ASSIGN βœ… (primary subject: Alex's preferences) -Episode B: "Jordan prefers afternoon meetings" (speaker: System) β†’ DO NOT ASSIGN ❌ (primary subject: Jordan, not Alex) - -Example 2 - Meta vs Actual: -Space: "Recipe collection" -Episode A: "My lasagna recipe: 3 layers pasta, bΓ©chamel, meat sauce..." β†’ ASSIGN βœ… (primary subject: actual recipe) -Episode B: "We should organize recipes by cuisine type" β†’ DO NOT ASSIGN ❌ (primary subject: organizing system, not recipe) - -Example 3 - Keyword Overlap Without Subject Match: -Space: "Home renovation project" -Episode A: "Installed new kitchen cabinets, chose oak wood" β†’ ASSIGN βœ… (primary subject: home renovation) -Episode B: "Friend asked advice about their kitchen renovation" β†’ DO NOT ASSIGN ❌ (primary subject: friend's project, not this home) - -Example 4 - Scope Constraints: -Space: "Personal identity and preferences (broadly useful across contexts, not app-specific)" -Episode A: "I prefer async communication and morning work hours" β†’ ASSIGN βœ… (cross-context preference, broadly applicable) -Episode B: "Demonstrated knowledge of ProjectX technical stack" β†’ DO NOT ASSIGN ❌ (work/project knowledge, not personal identity) - -RESPONSE FORMAT: -Provide your response inside tags with a valid JSON array: - - -[ - { - "episodeId": "episode-uuid", - "addSpaceId": ["${newSpaceId}"], - "confidence": 0.75, - "reasoning": "Brief explanation of intent match" - } -] - - -IMPORTANT: If an episode doesn't align with the space's intent, use empty addSpaceId array: [] -Example: {"episodeId": "ep-123", "addSpaceId": [], "confidence": 0.0, "reasoning": "No intent alignment"}`, - }, - { - role: "user", - content: `NEW SPACE TO POPULATE: -Name: ${newSpace.name} -Intent/Purpose: ${newSpace.description || "No description"} -Current Episodes: ${newSpace.currentEpisodeCount} - -EPISODES TO EVALUATE: -${episodesDescription} - -ASSIGNMENT TASK: -For each episode above, follow the step-by-step process to determine if it should be assigned to this space. - -Remember: -1. Identify the PRIMARY SUBJECT of each episode (who/what is it about?) -2. Check if that PRIMARY SUBJECT matches what this space is about -3. If the episode is ABOUT something else (even if it mentions related keywords), do NOT assign -4. If the episode is a META-discussion about the space's topic (not actual content), do NOT assign -5. Only assign if the episode's primary subject aligns with the space's intent AND confidence >= 0.75 - -Provide your analysis and assignments using the specified JSON format.`, - }, - ]; - } else { - // Episode mode - consider all spaces - const spacesDescription = enhancedSpaces - .map((space) => { - const spaceInfo = [ - `- ${space.name} (${space.id})`, - ` Intent/Purpose: ${space.description || "No description"}`, - ` Current Episodes: ${space.currentEpisodeCount}`, - ]; - - if (space.summary) { - spaceInfo.push(` Summary: ${space.summary}`); - } - - return spaceInfo.join("\n"); - }) - .join("\n\n"); - - return [ - { - role: "system", - content: `You are an expert at organizing episodes into semantic spaces based on the space's intent and purpose. - -CORE PRINCIPLE: Match episodes based on WHAT THE EPISODE IS FUNDAMENTALLY ABOUT (its primary subject), not just keyword overlap. - -STEP-BY-STEP FILTERING PROCESS: - -Step 1: IDENTIFY PRIMARY SUBJECT -Ask: "Who or what is this episode fundamentally about?" -- Is it about a specific person? (by name, or "I"/"my" = speaker) -- Is it about a system, tool, or organization? -- Is it about a project, event, or activity? -- Is it about a concept, topic, or idea? - -Step 2: HANDLE IMPLICIT SUBJECTS -- "I prefer..." or "My..." β†’ Subject is the SPEAKER (check episode source/metadata for identity) -- "User discussed..." or "Person X said..." β†’ Subject is that specific person -- "We decided..." β†’ Subject is the group/team/project being discussed -- If unclear, identify from context clues in the episode content - -Step 3: CHECK SUBJECT ALIGNMENT WITH EACH SPACE -For each available space, does the episode's PRIMARY SUBJECT match what that space is about? -- Match the subject identity (right person/thing/concept?) -- Match the subject relationship (is episode ABOUT the subject or just MENTIONING it?) -- Match the intent purpose (does episode serve the space's purpose?) -- An episode can match multiple spaces if its primary subject serves multiple intents - -Step 4: DISTINGUISH SUBJECT vs META -Ask: "Is this episode ABOUT the subject itself, or ABOUT discussing/analyzing the subject?" -- ABOUT subject: Episode contains actual content related to subject -- META-discussion: Episode discusses how to handle/analyze/organize the subject -- Only assign if episode is ABOUT the subject, not meta-discussion - -Step 5: VERIFY CONFIDENCE -Only assign to a space if confidence >= 0.75 based on: -- Subject identity clarity (is subject clearly identified?) -- Subject alignment strength (how well does it match space intent?) -- Content relevance (does episode content serve space purpose?) - -Step 6: MULTI-SPACE ASSIGNMENT -- An episode can belong to multiple spaces if its primary subject serves multiple intents -- Each space assignment should meet the >= 0.75 confidence threshold independently -- If no spaces match, use empty addSpaceId: [] - -CRITICAL RULE: PRIMARY SUBJECT MATCHING -The episode's PRIMARY SUBJECT must match the space's target subject. -- If space is about Person A, episodes about Person B should NOT match (even if same topic) -- If space is about a specific concept, meta-discussions about that concept should NOT match -- If space is about actual behaviors/facts, process discussions about organizing those facts should NOT match - -EXAMPLES OF CORRECT FILTERING: - -Example 1 - Person Identity: -Space: "Alex's work preferences" -Episode A: "I prefer morning meetings and async updates" (speaker: Alex) β†’ ASSIGN βœ… (primary subject: Alex's preferences) -Episode B: "Jordan prefers afternoon meetings" (speaker: System) β†’ DO NOT ASSIGN ❌ (primary subject: Jordan, not Alex) - -Example 2 - Meta vs Actual: -Space: "Recipe collection" -Episode A: "My lasagna recipe: 3 layers pasta, bΓ©chamel, meat sauce..." β†’ ASSIGN βœ… (primary subject: actual recipe) -Episode B: "We should organize recipes by cuisine type" β†’ DO NOT ASSIGN ❌ (primary subject: organizing system, not recipe) - -Example 3 - Keyword Overlap Without Subject Match: -Space: "Home renovation project" -Episode A: "Installed new kitchen cabinets, chose oak wood" β†’ ASSIGN βœ… (primary subject: home renovation) -Episode B: "Friend asked advice about their kitchen renovation" β†’ DO NOT ASSIGN ❌ (primary subject: friend's project, not this home) - -Example 4 - Scope Constraints: -Space: "Personal identity and preferences (broadly useful across contexts, not app-specific)" -Episode A: "I prefer async communication and morning work hours" β†’ ASSIGN βœ… (cross-context preference, broadly applicable) -Episode B: "I format task titles as {verb}: {title} in TaskApp" β†’ DO NOT ASSIGN ❌ (app-specific behavior, fails "not app-specific" constraint) -Episode C: "Demonstrated knowledge of ProjectX technical stack" β†’ DO NOT ASSIGN ❌ (work/project knowledge, not personal identity) - -RESPONSE FORMAT: -Provide your response inside tags with a valid JSON array: - - -[ - { - "episodeId": "episode-uuid", - "addSpaceId": ["space-uuid1", "space-uuid2"], - "confidence": 0.75, - "reasoning": "Brief explanation of intent match" - } -] - - -IMPORTANT: If no spaces' intents align with an episode, use empty addSpaceId array: [] -Example: {"episodeId": "ep-123", "addSpaceId": [], "confidence": 0.0, "reasoning": "No matching space intent"}`, - }, - { - role: "user", - content: `AVAILABLE SPACES (with their intents/purposes): -${spacesDescription} - -EPISODES TO ORGANIZE: -${episodesDescription} - -ASSIGNMENT TASK: -For each episode above, follow the step-by-step process to determine which space(s) it should be assigned to. - -Remember: -1. Identify the PRIMARY SUBJECT of each episode (who/what is it about?) -2. Check if that PRIMARY SUBJECT matches what each space is about -3. If the episode is ABOUT something else (even if it mentions related keywords), do NOT assign to that space -4. If the episode is a META-discussion about a space's topic (not actual content), do NOT assign to that space -5. An episode can be assigned to multiple spaces if its primary subject serves multiple intents -6. Only assign if the episode's primary subject aligns with the space's intent AND confidence >= 0.75 for that space - -Provide your analysis and assignments using the specified JSON format.`, - }, - ]; - } -} - -function parseLLMResponseWithTags( - response: string, - episodes: EpisodeData[], - spaces: Space[], -): AssignmentResult[] { - try { - // Extract content from tags - const outputMatch = response.match(/([\s\S]*?)<\/output>/); - if (!outputMatch) { - logger.warn( - "No tags found in LLM response, falling back to full response parsing", - ); - return parseLLMResponse(response, episodes, spaces); - } - - const jsonContent = outputMatch[1].trim(); - const parsed = JSON.parse(jsonContent); - - if (!Array.isArray(parsed)) { - logger.warn( - "Invalid LLM response format - expected array in tags", - ); - return []; - } - - const validSpaceIds = new Set(spaces.map((s) => s.id)); - const validEpisodeIds = new Set(episodes.map((e) => e.uuid)); - - return parsed - .filter((assignment: any) => { - // Validate assignment structure - if ( - !assignment.episodeId || - !validEpisodeIds.has(assignment.episodeId) - ) { - return false; - } - - // Validate spaceIds array - if (!assignment.addSpaceId || !Array.isArray(assignment.addSpaceId)) { - assignment.addSpaceId = []; - } - - // Filter out invalid space IDs - assignment.addSpaceId = assignment.addSpaceId.filter( - (spaceId: string) => validSpaceIds.has(spaceId), - ); - - return true; - }) - .map((assignment: any) => ({ - episodeId: assignment.episodeId, - spaceIds: assignment.addSpaceId, - confidence: assignment.confidence || 0.75, - reasoning: assignment.reasoning, - })); - } catch (error) { - logger.error( - "Error parsing LLM response with tags:", - error as Record, - ); - logger.debug("Raw LLM response:", { response } as Record); - // Fallback to regular parsing - return parseLLMResponse(response, episodes, spaces); - } -} - -function parseLLMResponse( - response: string, - episodes: EpisodeData[], - spaces: Space[], -): AssignmentResult[] { - try { - // Clean the response - remove any markdown formatting - const cleanedResponse = response - .replace(/```json\n?/g, "") - .replace(/```\n?/g, "") - .trim(); - - const parsed = JSON.parse(cleanedResponse); - - if (!parsed.assignments || !Array.isArray(parsed.assignments)) { - logger.warn("Invalid LLM response format - no assignments array"); - return []; - } - - const validSpaceIds = new Set(spaces.map((s) => s.id)); - const validEpisodeIds = new Set(episodes.map((e) => e.uuid)); - - return parsed.assignments - .filter((assignment: any) => { - // Validate assignment structure - if ( - !assignment.episodeId || - !validEpisodeIds.has(assignment.episodeId) - ) { - return false; - } - - if (!assignment.spaceIds || !Array.isArray(assignment.spaceIds)) { - return false; - } - - // Filter out invalid space IDs - assignment.spaceIds = assignment.spaceIds.filter((spaceId: string) => - validSpaceIds.has(spaceId), - ); - - return true; - }) - .map((assignment: any) => ({ - episodeId: assignment.episodeId, - spaceIds: assignment.spaceIds, - confidence: assignment.confidence || 0.75, - reasoning: assignment.reasoning, - })); - } catch (error) { - logger.error( - "Error parsing LLM response:", - error as Record, - ); - logger.debug("Raw LLM response:", { response } as Record); - return []; - } -} - // Helper function to trigger the task export async function triggerSpaceAssignment(payload: SpaceAssignmentPayload) { return await spaceAssignmentTask.trigger(payload, { diff --git a/apps/webapp/app/trigger/spaces/space-summary.ts b/apps/webapp/app/trigger/spaces/space-summary.ts index ceafbeb..a21547a 100644 --- a/apps/webapp/app/trigger/spaces/space-summary.ts +++ b/apps/webapp/app/trigger/spaces/space-summary.ts @@ -1,62 +1,11 @@ import { queue, task } from "@trigger.dev/sdk/v3"; import { logger } from "~/services/logger.service"; -import { SpaceService } from "~/services/space.server"; -import { makeModelCall } from "~/lib/model.server"; -import { runQuery } from "~/lib/neo4j.server"; -import { updateSpaceStatus, SPACE_STATUS } from "../utils/space-status"; -import type { CoreMessage } from "ai"; -import { z } from "zod"; -import { triggerSpacePattern } from "./space-pattern"; -import { getSpace, updateSpace } from "../utils/space-utils"; +import { + processSpaceSummary, + type SpaceSummaryPayload, +} from "~/jobs/spaces/space-summary.logic"; -import { EpisodeType } from "@core/types"; -import { getSpaceEpisodeCount } from "~/services/graphModels/space"; -import { addToQueue } from "~/lib/ingest.server"; - -interface SpaceSummaryPayload { - userId: string; - workspaceId: string; - spaceId: string; // Single space only - triggerSource?: "assignment" | "manual" | "scheduled"; -} - -interface SpaceEpisodeData { - uuid: string; - content: string; - originalContent: string; - source: string; - createdAt: Date; - validAt: Date; - metadata: any; - sessionId: string | null; -} - -interface SpaceSummaryData { - spaceId: string; - spaceName: string; - spaceDescription?: string; - contextCount: number; - summary: string; - keyEntities: string[]; - themes: string[]; - confidence: number; - lastUpdated: Date; - isIncremental: boolean; -} - -// Zod schema for LLM response validation -const SummaryResultSchema = z.object({ - summary: z.string(), - keyEntities: z.array(z.string()), - themes: z.array(z.string()), - confidence: z.number().min(0).max(1), -}); - -const CONFIG = { - maxEpisodesForSummary: 20, // Limit episodes for performance - minEpisodesForSummary: 1, // Minimum episodes to generate summary - summaryEpisodeThreshold: 5, // Minimum new episodes required to trigger summary (configurable) -}; +export type { SpaceSummaryPayload }; export const spaceSummaryQueue = queue({ name: "space-summary-queue", @@ -67,735 +16,17 @@ export const spaceSummaryTask = task({ id: "space-summary", queue: spaceSummaryQueue, run: async (payload: SpaceSummaryPayload) => { - const { userId, workspaceId, spaceId, triggerSource = "manual" } = payload; - - logger.info(`Starting space summary generation`, { - userId, - workspaceId, - spaceId, - triggerSource, + logger.info(`[Trigger.dev] Starting space summary task`, { + userId: payload.userId, + spaceId: payload.spaceId, + triggerSource: payload.triggerSource, }); - try { - // Update status to processing - await updateSpaceStatus(spaceId, SPACE_STATUS.PROCESSING, { - userId, - operation: "space-summary", - metadata: { triggerSource, phase: "start_summary" }, - }); - - // Generate summary for the single space - const summaryResult = await generateSpaceSummary( - spaceId, - userId, - triggerSource, - ); - - if (summaryResult) { - // Store the summary - await storeSummary(summaryResult); - - // Update status to ready after successful completion - await updateSpaceStatus(spaceId, SPACE_STATUS.READY, { - userId, - operation: "space-summary", - metadata: { - triggerSource, - phase: "completed_summary", - contextCount: summaryResult.contextCount, - confidence: summaryResult.confidence, - }, - }); - - logger.info(`Generated summary for space ${spaceId}`, { - statementCount: summaryResult.contextCount, - confidence: summaryResult.confidence, - themes: summaryResult.themes.length, - triggerSource, - }); - - return { - success: true, - spaceId, - triggerSource, - summary: { - statementCount: summaryResult.contextCount, - confidence: summaryResult.confidence, - themesCount: summaryResult.themes.length, - }, - }; - } else { - // No summary generated - this could be due to insufficient episodes or no new episodes - // This is not an error state, so update status to ready - await updateSpaceStatus(spaceId, SPACE_STATUS.READY, { - userId, - operation: "space-summary", - metadata: { - triggerSource, - phase: "no_summary_needed", - reason: "Insufficient episodes or no new episodes to summarize", - }, - }); - - logger.info( - `No summary generated for space ${spaceId} - insufficient or no new episodes`, - ); - return { - success: true, - spaceId, - triggerSource, - summary: null, - reason: "No episodes to summarize", - }; - } - } catch (error) { - // Update status to error on exception - try { - await updateSpaceStatus(spaceId, SPACE_STATUS.ERROR, { - userId, - operation: "space-summary", - metadata: { - triggerSource, - phase: "exception", - error: error instanceof Error ? error.message : "Unknown error", - }, - }); - } catch (statusError) { - logger.warn(`Failed to update status to error for space ${spaceId}`, { - statusError, - }); - } - - logger.error( - `Error in space summary generation for space ${spaceId}:`, - error as Record, - ); - throw error; - } + // Use common business logic + return await processSpaceSummary(payload); }, }); -async function generateSpaceSummary( - spaceId: string, - userId: string, - triggerSource?: "assignment" | "manual" | "scheduled", -): Promise { - try { - // 1. Get space details - const spaceService = new SpaceService(); - const space = await spaceService.getSpace(spaceId, userId); - - if (!space) { - logger.warn(`Space ${spaceId} not found for user ${userId}`); - return null; - } - - // 2. Check episode count threshold (skip for manual triggers) - if (triggerSource !== "manual") { - const currentEpisodeCount = await getSpaceEpisodeCount(spaceId, userId); - const lastSummaryEpisodeCount = space.contextCount || 0; - const episodeDifference = currentEpisodeCount - lastSummaryEpisodeCount; - - if ( - episodeDifference < CONFIG.summaryEpisodeThreshold || - lastSummaryEpisodeCount !== 0 - ) { - logger.info( - `Skipping summary generation for space ${spaceId}: only ${episodeDifference} new episodes (threshold: ${CONFIG.summaryEpisodeThreshold})`, - { - currentEpisodeCount, - lastSummaryEpisodeCount, - episodeDifference, - threshold: CONFIG.summaryEpisodeThreshold, - }, - ); - return null; - } - - logger.info( - `Proceeding with summary generation for space ${spaceId}: ${episodeDifference} new episodes (threshold: ${CONFIG.summaryEpisodeThreshold})`, - { - currentEpisodeCount, - lastSummaryEpisodeCount, - episodeDifference, - }, - ); - } - - // 2. Check for existing summary - const existingSummary = await getExistingSummary(spaceId); - const isIncremental = existingSummary !== null; - - // 3. Get episodes (all or new ones based on existing summary) - const episodes = await getSpaceEpisodes( - spaceId, - userId, - isIncremental ? existingSummary?.lastUpdated : undefined, - ); - - // Handle case where no new episodes exist for incremental update - if (isIncremental && episodes.length === 0) { - logger.info( - `No new episodes found for space ${spaceId}, skipping summary update`, - ); - return null; - } - - // Check minimum episode requirement for new summaries only - if (!isIncremental && episodes.length < CONFIG.minEpisodesForSummary) { - logger.info( - `Space ${spaceId} has insufficient episodes (${episodes.length}) for new summary`, - ); - return null; - } - - // 4. Process episodes using unified approach - let summaryResult; - - if (episodes.length > CONFIG.maxEpisodesForSummary) { - logger.info( - `Large space detected (${episodes.length} episodes). Processing in batches.`, - ); - - // Process in batches, each building on previous result - const batches: SpaceEpisodeData[][] = []; - for (let i = 0; i < episodes.length; i += CONFIG.maxEpisodesForSummary) { - batches.push(episodes.slice(i, i + CONFIG.maxEpisodesForSummary)); - } - - let currentSummary = existingSummary?.summary || null; - let currentThemes = existingSummary?.themes || []; - let cumulativeConfidence = 0; - - for (const [batchIndex, batch] of batches.entries()) { - logger.info( - `Processing batch ${batchIndex + 1}/${batches.length} with ${batch.length} episodes`, - ); - - const batchResult = await generateUnifiedSummary( - space.name, - space.description as string, - batch, - currentSummary, - currentThemes, - ); - - if (batchResult) { - currentSummary = batchResult.summary; - currentThemes = batchResult.themes; - cumulativeConfidence += batchResult.confidence; - } else { - logger.warn(`Failed to process batch ${batchIndex + 1}`); - } - - // Small delay between batches - if (batchIndex < batches.length - 1) { - await new Promise((resolve) => setTimeout(resolve, 500)); - } - } - - summaryResult = currentSummary - ? { - summary: currentSummary, - themes: currentThemes, - confidence: Math.min(cumulativeConfidence / batches.length, 1.0), - } - : null; - } else { - logger.info( - `Processing ${episodes.length} episodes with unified approach`, - ); - - // Use unified approach for smaller spaces - summaryResult = await generateUnifiedSummary( - space.name, - space.description as string, - episodes, - existingSummary?.summary || null, - existingSummary?.themes || [], - ); - } - - if (!summaryResult) { - logger.warn(`Failed to generate LLM summary for space ${spaceId}`); - return null; - } - - // Get the actual current counts from Neo4j - const currentEpisodeCount = await getSpaceEpisodeCount(spaceId, userId); - - return { - spaceId: space.uuid, - spaceName: space.name, - spaceDescription: space.description as string, - contextCount: currentEpisodeCount, - summary: summaryResult.summary, - keyEntities: summaryResult.keyEntities || [], - themes: summaryResult.themes, - confidence: summaryResult.confidence, - lastUpdated: new Date(), - isIncremental, - }; - } catch (error) { - logger.error( - `Error generating summary for space ${spaceId}:`, - error as Record, - ); - return null; - } -} - -async function generateUnifiedSummary( - spaceName: string, - spaceDescription: string | undefined, - episodes: SpaceEpisodeData[], - previousSummary: string | null = null, - previousThemes: string[] = [], -): Promise<{ - summary: string; - themes: string[]; - confidence: number; - keyEntities?: string[]; -} | null> { - try { - const prompt = createUnifiedSummaryPrompt( - spaceName, - spaceDescription, - episodes, - previousSummary, - previousThemes, - ); - - // Space summary generation requires HIGH complexity (creative synthesis, narrative generation) - let responseText = ""; - await makeModelCall( - false, - prompt, - (text: string) => { - responseText = text; - }, - undefined, - "high", - ); - - return parseSummaryResponse(responseText); - } catch (error) { - logger.error( - "Error generating unified summary:", - error as Record, - ); - return null; - } -} - -function createUnifiedSummaryPrompt( - spaceName: string, - spaceDescription: string | undefined, - episodes: SpaceEpisodeData[], - previousSummary: string | null, - previousThemes: string[], -): CoreMessage[] { - // If there are no episodes and no previous summary, we cannot generate a meaningful summary - if (episodes.length === 0 && previousSummary === null) { - throw new Error( - "Cannot generate summary without episodes or existing summary", - ); - } - - const episodesText = episodes - .map( - (episode) => - `- ${episode.content} (Source: ${episode.source}, Session: ${episode.sessionId || "N/A"})`, - ) - .join("\n"); - - // Extract key entities and themes from episode content - const contentWords = episodes - .map((ep) => ep.content.toLowerCase()) - .join(" ") - .split(/\s+/) - .filter((word) => word.length > 3); - - const wordFrequency = new Map(); - contentWords.forEach((word) => { - wordFrequency.set(word, (wordFrequency.get(word) || 0) + 1); - }); - - const topEntities = Array.from(wordFrequency.entries()) - .sort(([, a], [, b]) => b - a) - .slice(0, 10) - .map(([word]) => word); - - const isUpdate = previousSummary !== null; - - return [ - { - role: "system", - content: `You are an expert at analyzing and summarizing episodes within semantic spaces based on the space's intent and purpose. Your task is to ${isUpdate ? "update an existing summary by integrating new episodes" : "create a comprehensive summary of episodes"}. - -CRITICAL RULES: -1. Base your summary ONLY on insights derived from the actual content/episodes provided -2. Use the space's INTENT/PURPOSE (from description) to guide what to summarize and how to organize it -3. Write in a factual, neutral tone - avoid promotional language ("pivotal", "invaluable", "cutting-edge") -4. Be specific and concrete - reference actual content, patterns, and insights found in the episodes -5. If episodes are insufficient for meaningful insights, state that more data is needed - -INTENT-DRIVEN SUMMARIZATION: -Your summary should SERVE the space's intended purpose. Examples: -- "Learning React" β†’ Summarize React concepts, patterns, techniques learned -- "Project X Updates" β†’ Summarize progress, decisions, blockers, next steps -- "Health Tracking" β†’ Summarize metrics, trends, observations, insights -- "Guidelines for React" β†’ Extract actionable patterns, best practices, rules -- "Evolution of design thinking" β†’ Track how thinking changed over time, decision points -The intent defines WHY this space exists - organize content to serve that purpose. - -INSTRUCTIONS: -${ - isUpdate - ? `1. Review the existing summary and themes carefully -2. Analyze the new episodes for patterns and insights that align with the space's intent -3. Identify connecting points between existing knowledge and new episodes -4. Update the summary to seamlessly integrate new information while preserving valuable existing insights -5. Evolve themes by adding new ones or refining existing ones based on the space's purpose -6. Organize the summary to serve the space's intended use case` - : `1. Analyze the semantic content and relationships within the episodes -2. Identify topics/sections that align with the space's INTENT and PURPOSE -3. Create a coherent summary that serves the space's intended use case -4. Organize the summary based on the space's purpose (not generic frequency-based themes)` -} -${isUpdate ? "7" : "5"}. Assess your confidence in the ${isUpdate ? "updated" : ""} summary quality (0.0-1.0) - -INTENT-ALIGNED ORGANIZATION: -- Organize sections based on what serves the space's purpose -- Topics don't need minimum episode counts - relevance to intent matters most -- Each section should provide value aligned with the space's intended use -- For "guidelines" spaces: focus on actionable patterns -- For "tracking" spaces: focus on temporal patterns and changes -- For "learning" spaces: focus on concepts and insights gained -- Let the space's intent drive the structure, not rigid rules - -${ - isUpdate - ? `CONNECTION FOCUS: -- Entity relationships that span across batches/time -- Theme evolution and expansion -- Temporal patterns and progressions -- Contradictions or confirmations of existing insights -- New insights that complement existing knowledge` - : "" -} - -RESPONSE FORMAT: -Provide your response inside tags with valid JSON. Include both HTML summary and markdown format. - - -{ - "summary": "${isUpdate ? "Updated HTML summary that integrates new insights with existing knowledge. Write factually about what the statements reveal - mention specific entities, relationships, and patterns found in the data. Avoid marketing language. Use HTML tags for structure." : "Factual HTML summary based on patterns found in the statements. Report what the data actually shows - specific entities, relationships, frequencies, and concrete insights. Avoid promotional language. Use HTML tags like

      , ,

        ,
      • for structure. Keep it concise and evidence-based."}", - "keyEntities": ["entity1", "entity2", "entity3"], - "themes": ["${isUpdate ? 'updated_theme1", "new_theme2", "evolved_theme3' : 'theme1", "theme2", "theme3'}"], - "confidence": 0.85 -} - - -JSON FORMATTING RULES: -- HTML content in summary field is allowed and encouraged -- Escape quotes within strings as \" -- Escape HTML angle brackets if needed: < and > -- Use proper HTML tags for structure:

        , , ,

          ,
        • ,

          , etc. -- HTML content should be well-formed and semantic - -GUIDELINES: -${ - isUpdate - ? `- Preserve valuable insights from existing summary -- Integrate new information by highlighting connections -- Themes should evolve naturally, don't replace wholesale -- The updated summary should read as a coherent whole -- Make the summary user-friendly and explain what value this space provides` - : `- Report only what the episodes actually reveal - be specific and concrete -- Cite actual content and patterns found in the episodes -- Avoid generic descriptions that could apply to any space -- Use neutral, factual language - no "comprehensive", "robust", "cutting-edge" etc. -- Themes must be backed by at least 3 supporting episodes with clear evidence -- Better to have fewer, well-supported themes than many weak ones -- Confidence should reflect actual data quality and coverage, not aspirational goals` -}`, - }, - { - role: "user", - content: `SPACE INFORMATION: -Name: "${spaceName}" -Intent/Purpose: ${spaceDescription || "No specific intent provided - organize naturally based on content"} - -${ - isUpdate - ? `EXISTING SUMMARY: -${previousSummary} - -EXISTING THEMES: -${previousThemes.join(", ")} - -NEW EPISODES TO INTEGRATE (${episodes.length} episodes):` - : `EPISODES IN THIS SPACE (${episodes.length} episodes):` -} -${episodesText} - -${ - episodes.length > 0 - ? `TOP WORDS BY FREQUENCY: -${topEntities.join(", ")}` - : "" -} - -${ - isUpdate - ? "Please identify connections between the existing summary and new episodes, then update the summary to integrate the new insights coherently. Organize the summary to SERVE the space's intent/purpose. Remember: only summarize insights from the actual episode content." - : "Please analyze the episodes and provide a comprehensive summary that SERVES the space's intent/purpose. Organize sections based on what would be most valuable for this space's intended use case. If the intent is unclear, organize naturally based on content patterns. Only summarize insights from actual episode content." -}`, - }, - ]; -} - -async function getExistingSummary(spaceId: string): Promise<{ - summary: string; - themes: string[]; - lastUpdated: Date; - contextCount: number; -} | null> { - try { - const existingSummary = await getSpace(spaceId); - - if (existingSummary?.summary) { - return { - summary: existingSummary.summary, - themes: existingSummary.themes, - lastUpdated: existingSummary.summaryGeneratedAt || new Date(), - contextCount: existingSummary.contextCount || 0, - }; - } - - return null; - } catch (error) { - logger.warn(`Failed to get existing summary for space ${spaceId}:`, { - error, - }); - return null; - } -} - -async function getSpaceEpisodes( - spaceId: string, - userId: string, - sinceDate?: Date, -): Promise { - // Query episodes directly using Space-[:HAS_EPISODE]->Episode relationships - const params: any = { spaceId, userId }; - - let dateCondition = ""; - if (sinceDate) { - dateCondition = "AND e.createdAt > $sinceDate"; - params.sinceDate = sinceDate.toISOString(); - } - - const query = ` - MATCH (space:Space {uuid: $spaceId, userId: $userId})-[:HAS_EPISODE]->(e:Episode {userId: $userId}) - WHERE e IS NOT NULL ${dateCondition} - RETURN DISTINCT e - ORDER BY e.createdAt DESC - `; - - const result = await runQuery(query, params); - - return result.map((record) => { - const episode = record.get("e").properties; - return { - uuid: episode.uuid, - content: episode.content, - originalContent: episode.originalContent, - source: episode.source, - createdAt: new Date(episode.createdAt), - validAt: new Date(episode.validAt), - metadata: JSON.parse(episode.metadata || "{}"), - sessionId: episode.sessionId, - }; - }); -} - -function parseSummaryResponse(response: string): { - summary: string; - themes: string[]; - confidence: number; - keyEntities?: string[]; -} | null { - try { - // Extract content from tags - const outputMatch = response.match(/([\s\S]*?)<\/output>/); - if (!outputMatch) { - logger.warn("No tags found in LLM summary response"); - logger.debug("Full LLM response:", { response }); - return null; - } - - let jsonContent = outputMatch[1].trim(); - - let parsed; - try { - parsed = JSON.parse(jsonContent); - } catch (jsonError) { - logger.warn("JSON parsing failed, attempting cleanup and retry", { - originalError: jsonError, - jsonContent: jsonContent.substring(0, 500) + "...", // Log first 500 chars - }); - - // More aggressive cleanup for malformed JSON - jsonContent = jsonContent - .replace(/([^\\])"/g, '$1\\"') // Escape unescaped quotes - .replace(/^"/g, '\\"') // Escape quotes at start - .replace(/\\\\"/g, '\\"'); // Fix double-escaped quotes - - parsed = JSON.parse(jsonContent); - } - - // Validate the response structure - const validationResult = SummaryResultSchema.safeParse(parsed); - if (!validationResult.success) { - logger.warn("Invalid LLM summary response format:", { - error: validationResult.error, - parsedData: parsed, - }); - return null; - } - - return validationResult.data; - } catch (error) { - logger.error( - "Error parsing LLM summary response:", - error as Record, - ); - logger.debug("Failed response content:", { response }); - return null; - } -} - -async function storeSummary(summaryData: SpaceSummaryData): Promise { - try { - // Store in PostgreSQL for API access and persistence - await updateSpace(summaryData); - - // Also store in Neo4j for graph-based queries - const query = ` - MATCH (space:Space {uuid: $spaceId}) - SET space.summary = $summary, - space.keyEntities = $keyEntities, - space.themes = $themes, - space.summaryConfidence = $confidence, - space.summaryContextCount = $contextCount, - space.summaryLastUpdated = datetime($lastUpdated) - RETURN space - `; - - await runQuery(query, { - spaceId: summaryData.spaceId, - summary: summaryData.summary, - keyEntities: summaryData.keyEntities, - themes: summaryData.themes, - confidence: summaryData.confidence, - contextCount: summaryData.contextCount, - lastUpdated: summaryData.lastUpdated.toISOString(), - }); - - logger.info(`Stored summary for space ${summaryData.spaceId}`, { - themes: summaryData.themes.length, - keyEntities: summaryData.keyEntities.length, - confidence: summaryData.confidence, - }); - } catch (error) { - logger.error( - `Error storing summary for space ${summaryData.spaceId}:`, - error as Record, - ); - throw error; - } -} - -/** - * Process space summary sequentially: ingest document then trigger patterns - */ -async function processSpaceSummarySequentially({ - userId, - workspaceId, - spaceId, - spaceName, - summaryContent, - triggerSource, -}: { - userId: string; - workspaceId: string; - spaceId: string; - spaceName: string; - summaryContent: string; - triggerSource: - | "summary_complete" - | "manual" - | "assignment" - | "scheduled" - | "new_space" - | "growth_threshold" - | "ingestion_complete"; -}): Promise { - // Step 1: Ingest summary as document synchronously - await ingestSpaceSummaryDocument(spaceId, userId, spaceName, summaryContent); - - logger.info( - `Successfully ingested space summary document for space ${spaceId}`, - ); - - // Step 2: Now trigger space patterns (patterns will have access to the ingested summary) - await triggerSpacePattern({ - userId, - workspaceId, - spaceId, - triggerSource, - }); - - logger.info( - `Sequential processing completed for space ${spaceId}: summary ingested β†’ patterns triggered`, - ); -} - -/** - * Ingest space summary as document synchronously - */ -async function ingestSpaceSummaryDocument( - spaceId: string, - userId: string, - spaceName: string, - summaryContent: string, -): Promise { - // Create the ingest body - const ingestBody = { - episodeBody: summaryContent, - referenceTime: new Date().toISOString(), - metadata: { - documentType: "space_summary", - spaceId, - spaceName, - generatedAt: new Date().toISOString(), - }, - source: "space", - spaceId, - sessionId: spaceId, - type: EpisodeType.DOCUMENT, - }; - - // Add to queue - await addToQueue(ingestBody, userId); - - logger.info(`Queued space summary for synchronous ingestion`); - - return; -} - // Helper function to trigger the task export async function triggerSpaceSummary(payload: SpaceSummaryPayload) { return await spaceSummaryTask.trigger(payload, { diff --git a/apps/webapp/app/trigger/utils/space-utils.ts b/apps/webapp/app/trigger/utils/space-utils.ts index 8144c56..fee6c27 100644 --- a/apps/webapp/app/trigger/utils/space-utils.ts +++ b/apps/webapp/app/trigger/utils/space-utils.ts @@ -1,4 +1,3 @@ -import { type SpacePattern } from "@core/types"; import { prisma } from "./prisma"; export const getSpace = async (spaceId: string) => { @@ -11,22 +10,6 @@ export const getSpace = async (spaceId: string) => { return space; }; -export const createSpacePattern = async ( - spaceId: string, - allPatterns: Omit< - SpacePattern, - "id" | "createdAt" | "updatedAt" | "spaceId" - >[], -) => { - return await prisma.spacePattern.createMany({ - data: allPatterns.map((pattern) => ({ - ...pattern, - spaceId, - userConfirmed: pattern.userConfirmed as any, // Temporary cast until Prisma client is regenerated - })), - }); -}; - export const updateSpace = async (summaryData: { spaceId: string; summary: string; @@ -41,7 +24,7 @@ export const updateSpace = async (summaryData: { summary: summaryData.summary, themes: summaryData.themes, contextCount: summaryData.contextCount, - summaryGeneratedAt: new Date().toISOString() + summaryGeneratedAt: new Date().toISOString(), }, }); }; diff --git a/apps/webapp/app/utils/mcp/memory.ts b/apps/webapp/app/utils/mcp/memory.ts index 45a4f79..acab580 100644 --- a/apps/webapp/app/utils/mcp/memory.ts +++ b/apps/webapp/app/utils/mcp/memory.ts @@ -1,3 +1,4 @@ +import { randomUUID } from "node:crypto"; import { EpisodeTypeEnum } from "@core/types"; import { addToQueue } from "~/lib/ingest.server"; import { logger } from "~/services/logger.service"; @@ -150,6 +151,20 @@ export const memoryTools = [ }, }, }, + { + name: "get_session_id", + description: + "Get a new session ID for the MCP connection. USE THIS TOOL: When you need a session ID and don't have one yet. This generates a unique UUID to identify your MCP session. IMPORTANT: If any other tool requires a sessionId parameter and you don't have one, call this tool first to get a session ID. Returns: A UUID string to use as sessionId.", + inputSchema: { + type: "object", + properties: { + new: { + type: "boolean", + description: "Set to true to get a new sessionId.", + }, + }, + }, + }, { name: "get_integrations", description: @@ -162,7 +177,7 @@ export const memoryTools = [ { name: "get_integration_actions", description: - "Get list of actions available for a specific integration. USE THIS TOOL: After get_integrations to see what operations you can perform. For example, GitHub integration has actions like 'get_pr', 'get_issues', 'create_issue'. HOW TO USE: Provide the integrationSlug from get_integrations (like 'github', 'linear', 'slack'). Returns: Array of actions with name, description, and inputSchema for each.", + "Get list of actions available for a specific integration. USE THIS TOOL: After get_integrations to see what operations you can perform. For example, GitHub integration has actions like 'get_pr', 'get_issues', 'create_issue'. HOW TO USE: Provide the integrationSlug from get_integrations (like 'github', 'linear', 'slack').", inputSchema: { type: "object", properties: { @@ -178,7 +193,7 @@ export const memoryTools = [ { name: "execute_integration_action", description: - "Execute an action on an integration (fetch GitHub PR, create Linear issue, send Slack message, etc.). USE THIS TOOL: After using get_integration_actions to see available actions. HOW TO USE: 1) Set integrationSlug (like 'github'), 2) Set action name (like 'get_pr'), 3) Set arguments object with required parameters from the action's inputSchema. Returns: Result of the action execution.", + "Execute an action on an integration (fetch GitHub PR, create Linear issue, send Slack message, etc.). USE THIS TOOL: After using get_integration_actions to see available actions. HOW TO USE: 1) Set integrationSlug (like 'github'), 2) Set action name (like 'get_pr'), 3) Set arguments object with required parameters from the action's inputSchema.", inputSchema: { type: "object", properties: { @@ -243,6 +258,8 @@ export async function callMemoryTool( return await handleUserProfile(userId); case "memory_get_space": return await handleGetSpace({ ...args, userId }); + case "get_session_id": + return await handleGetSessionId(); case "get_integrations": return await handleGetIntegrations({ ...args, userId }); case "get_integration_actions": @@ -489,6 +506,35 @@ async function handleGetSpace(args: any) { } } +// Handler for get_session_id +async function handleGetSessionId() { + try { + const sessionId = randomUUID(); + + return { + content: [ + { + type: "text", + text: JSON.stringify({ sessionId }), + }, + ], + isError: false, + }; + } catch (error) { + logger.error(`MCP get session id error: ${error}`); + + return { + content: [ + { + type: "text", + text: `Error generating session ID: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + isError: true, + }; + } +} + // Handler for get_integrations async function handleGetIntegrations(args: any) { try { diff --git a/apps/webapp/package.json b/apps/webapp/package.json index aae6e45..44c31c6 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -44,6 +44,7 @@ "@radix-ui/react-icons": "^1.3.0", "@radix-ui/react-label": "^2.0.2", "@radix-ui/react-popover": "^1.0.7", + "@radix-ui/react-progress": "^1.1.4", "@radix-ui/react-scroll-area": "^1.0.5", "@radix-ui/react-select": "^2.0.0", "@radix-ui/react-separator": "^1.1.7", @@ -53,7 +54,6 @@ "@radix-ui/react-tabs": "^1.0.4", "@radix-ui/react-toast": "^1.1.5", "@radix-ui/react-tooltip": "^1.2.7", - "@radix-ui/react-progress": "^1.1.4", "@remix-run/express": "2.16.7", "@remix-run/node": "2.1.0", "@remix-run/react": "2.16.7", @@ -80,6 +80,7 @@ "@tiptap/pm": "^2.11.9", "@tiptap/react": "^2.11.9", "@tiptap/starter-kit": "2.11.9", + "@trigger.dev/python": "^4.0.5", "@trigger.dev/react-hooks": "4.0.4", "@trigger.dev/sdk": "4.0.4", "ai": "5.0.78", @@ -125,25 +126,25 @@ "react": "^18.2.0", "react-calendar-heatmap": "^1.10.0", "react-dom": "^18.2.0", + "react-hotkeys-hook": "^4.5.0", "react-markdown": "10.1.0", "react-resizable-panels": "^1.0.9", - "react-hotkeys-hook": "^4.5.0", "react-virtualized": "^9.22.6", - "resumable-stream": "2.2.8", "remix-auth": "^4.2.0", "remix-auth-oauth2": "^3.4.1", "remix-themes": "^2.0.4", "remix-typedjson": "0.3.1", "remix-utils": "^7.7.0", + "resumable-stream": "2.2.8", "sigma": "^3.0.2", - "stripe": "19.0.0", "simple-oauth2": "^5.1.0", + "stripe": "19.0.0", "tailwind-merge": "^2.6.0", - "tiptap-markdown": "0.9.0", "tailwind-scrollbar-hide": "^2.0.0", "tailwindcss-animate": "^1.0.7", "tailwindcss-textshadow": "^2.1.3", "tiny-invariant": "^1.3.1", + "tiptap-markdown": "0.9.0", "zod": "3.25.76", "zod-error": "1.5.0", "zod-validation-error": "^1.5.0" diff --git a/apps/webapp/trigger.config.ts b/apps/webapp/trigger.config.ts index 938f4b5..c42df98 100644 --- a/apps/webapp/trigger.config.ts +++ b/apps/webapp/trigger.config.ts @@ -1,6 +1,7 @@ import { defineConfig } from "@trigger.dev/sdk/v3"; import { syncEnvVars } from "@trigger.dev/build/extensions/core"; import { prismaExtension } from "@trigger.dev/build/extensions/prisma"; +import { pythonExtension } from "@trigger.dev/python/extension"; export default defineConfig({ project: process.env.TRIGGER_PROJECT_ID as string, @@ -38,6 +39,10 @@ export default defineConfig({ prismaExtension({ schema: "prisma/schema.prisma", }), + pythonExtension({ + scripts: ["./app/bert/**/*.py"], + requirementsFile: "./app/bert/requirements.txt", + }), ], }, }); diff --git a/docs/self-hosting/docker.mdx b/docs/self-hosting/docker.mdx index 2dde599..42143b5 100644 --- a/docs/self-hosting/docker.mdx +++ b/docs/self-hosting/docker.mdx @@ -14,9 +14,7 @@ description: "Get started with CORE in 5 minutes" ## Requirements -These are the minimum requirements for running the webapp and background job components. They can run on the same, or on separate machines. - -It's fine to run everything on the same machine for testing. To be able to scale your workers, you will want to run them separately. +These are the minimum requirements for running the core. ### Prerequisites @@ -27,7 +25,6 @@ To run CORE, you will need: ### System Requirements -**Webapp & Database Machine:** - 4+ vCPU - 8+ GB RAM - 20+ GB Storage @@ -41,7 +38,7 @@ CORE offers multiple deployment approaches depending on your needs: For a one-click deployment experience, use Railway: -[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/6aEd9C?referralCode=LHvbIb&utm_medium=integration&utm_source=template&utm_campaign=generic) +[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/core?referralCode=LHvbIb&utm_medium=integration&utm_source=template&utm_campaign=generic) Railway will automatically set up all required services and handle the infrastructure for you. diff --git a/docs/self-hosting/overview.mdx b/docs/self-hosting/overview.mdx index aea684d..5850fca 100644 --- a/docs/self-hosting/overview.mdx +++ b/docs/self-hosting/overview.mdx @@ -16,7 +16,7 @@ We provide version-tagged releases for self-hosted deployments. It's highly advi For a quick one-click deployment, you can use Railway: -[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/6aEd9C?referralCode=LHvbIb&utm_medium=integration&utm_source=template&utm_campaign=generic) +[![Deploy on Railway](https://railway.com/button.svg)](https://railway.com/deploy/core?referralCode=LHvbIb&utm_medium=integration&utm_source=template&utm_campaign=generic) Alternatively, you can follow our [Docker deployment guide](/self-hosting/docker) for manual setup. diff --git a/hosting/docker/docker-compose.yaml b/hosting/docker/docker-compose.yaml index 4487903..1184e87 100644 --- a/hosting/docker/docker-compose.yaml +++ b/hosting/docker/docker-compose.yaml @@ -8,55 +8,55 @@ x-logging: &logging-config version: "3.8" services: - core: - container_name: core-app - image: redplanethq/core:${VERSION} - environment: - - NODE_ENV=${NODE_ENV} - - DATABASE_URL=${DATABASE_URL} - - DIRECT_URL=${DIRECT_URL} - - SESSION_SECRET=${SESSION_SECRET} - - ENCRYPTION_KEY=${ENCRYPTION_KEY} - - MAGIC_LINK_SECRET=${MAGIC_LINK_SECRET} - - LOGIN_ORIGIN=${CORE_LOGIN_ORIGIN} - - APP_ORIGIN=${CORE_APP_ORIGIN} - - REDIS_HOST=${REDIS_HOST} - - REDIS_PORT=${REDIS_PORT} - - REDIS_PASSWORD=${REDIS_PASSWORD} - - REDIS_TLS_DISABLED=${REDIS_TLS_DISABLED} - - NEO4J_URI=${NEO4J_URI} - - NEO4J_USERNAME=${NEO4J_USERNAME} - - NEO4J_PASSWORD=${NEO4J_PASSWORD} - - OPENAI_API_KEY=${OPENAI_API_KEY} - - AUTH_GOOGLE_CLIENT_ID=${AUTH_GOOGLE_CLIENT_ID} - - AUTH_GOOGLE_CLIENT_SECRET=${AUTH_GOOGLE_CLIENT_SECRET} - - ENABLE_EMAIL_LOGIN=${ENABLE_EMAIL_LOGIN} - - OLLAMA_URL=${OLLAMA_URL} - - EMBEDDING_MODEL=${EMBEDDING_MODEL} - - MODEL=${MODEL} - - TRIGGER_PROJECT_ID=${TRIGGER_PROJECT_ID} - - TRIGGER_SECRET_KEY=${TRIGGER_SECRET_KEY} - - TRIGGER_API_URL=${API_ORIGIN} - - POSTGRES_DB=${POSTGRES_DB} - - EMAIL_TRANSPORT=${EMAIL_TRANSPORT} - - REPLY_TO_EMAIL=${REPLY_TO_EMAIL} - - FROM_EMAIL=${FROM_EMAIL} - - RESEND_API_KEY=${RESEND_API_KEY} - - COHERE_API_KEY=${COHERE_API_KEY} - - QUEUE_PROVIDER=${QUEUE_PROVIDER} - - TELEMETRY_ENABLED=${TELEMETRY_ENABLED} - - TELEMETRY_ANONYMOUS=${TELEMETRY_ANONYMOUS} - ports: - - "3033:3000" - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_started - neo4j: - condition: service_healthy - networks: - - core + # core: + # container_name: core-app + # image: redplanethq/core:${VERSION} + # environment: + # - NODE_ENV=${NODE_ENV} + # - DATABASE_URL=${DATABASE_URL} + # - DIRECT_URL=${DIRECT_URL} + # - SESSION_SECRET=${SESSION_SECRET} + # - ENCRYPTION_KEY=${ENCRYPTION_KEY} + # - MAGIC_LINK_SECRET=${MAGIC_LINK_SECRET} + # - LOGIN_ORIGIN=${CORE_LOGIN_ORIGIN} + # - APP_ORIGIN=${CORE_APP_ORIGIN} + # - REDIS_HOST=${REDIS_HOST} + # - REDIS_PORT=${REDIS_PORT} + # - REDIS_PASSWORD=${REDIS_PASSWORD} + # - REDIS_TLS_DISABLED=${REDIS_TLS_DISABLED} + # - NEO4J_URI=${NEO4J_URI} + # - NEO4J_USERNAME=${NEO4J_USERNAME} + # - NEO4J_PASSWORD=${NEO4J_PASSWORD} + # - OPENAI_API_KEY=${OPENAI_API_KEY} + # - AUTH_GOOGLE_CLIENT_ID=${AUTH_GOOGLE_CLIENT_ID} + # - AUTH_GOOGLE_CLIENT_SECRET=${AUTH_GOOGLE_CLIENT_SECRET} + # - ENABLE_EMAIL_LOGIN=${ENABLE_EMAIL_LOGIN} + # - OLLAMA_URL=${OLLAMA_URL} + # - EMBEDDING_MODEL=${EMBEDDING_MODEL} + # - MODEL=${MODEL} + # - TRIGGER_PROJECT_ID=${TRIGGER_PROJECT_ID} + # - TRIGGER_SECRET_KEY=${TRIGGER_SECRET_KEY} + # - TRIGGER_API_URL=${API_ORIGIN} + # - POSTGRES_DB=${POSTGRES_DB} + # - EMAIL_TRANSPORT=${EMAIL_TRANSPORT} + # - REPLY_TO_EMAIL=${REPLY_TO_EMAIL} + # - FROM_EMAIL=${FROM_EMAIL} + # - RESEND_API_KEY=${RESEND_API_KEY} + # - COHERE_API_KEY=${COHERE_API_KEY} + # - QUEUE_PROVIDER=${QUEUE_PROVIDER} + # - TELEMETRY_ENABLED=${TELEMETRY_ENABLED} + # - TELEMETRY_ANONYMOUS=${TELEMETRY_ANONYMOUS} + # ports: + # - "3033:3000" + # depends_on: + # postgres: + # condition: service_healthy + # redis: + # condition: service_started + # neo4j: + # condition: service_healthy + # networks: + # - core postgres: container_name: core-postgres diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1a78a8f..ebc6406 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -478,6 +478,9 @@ importers: '@tiptap/starter-kit': specifier: 2.11.9 version: 2.11.9 + '@trigger.dev/python': + specifier: ^4.0.5 + version: 4.0.5(@trigger.dev/build@4.0.4(typescript@5.8.3))(@trigger.dev/sdk@4.0.4(ai@5.0.78(zod@3.25.76))(zod@3.25.76)) '@trigger.dev/react-hooks': specifier: 4.0.4 version: 4.0.4(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -1810,6 +1813,9 @@ packages: '@electric-sql/client@1.0.0-beta.1': resolution: {integrity: sha512-Ei9jN3pDoGzc+a/bGqnB5ajb52IvSv7/n2btuyzUlcOHIR2kM9fqtYTJXPwZYKLkGZlHWlpHgWyRtrinkP2nHg==} + '@electric-sql/client@1.0.14': + resolution: {integrity: sha512-LtPAfeMxXRiYS0hyDQ5hue2PjljUiK9stvzsVyVb4nwxWQxfOWTSF42bHTs/o5i3x1T4kAQ7mwHpxa4A+f8X7Q==} + '@emnapi/core@1.4.3': resolution: {integrity: sha512-4m62DuCE07lw01soJwPiBGC0nAww0Q+RY70VZ+n49yDIO13yyinhbWCeNnaob0lakDtWQzSdtNWzJeOJt2ma+g==} @@ -2698,6 +2704,9 @@ packages: '@mdx-js/mdx@2.3.0': resolution: {integrity: sha512-jLuwRlz8DQfQNiUCJR50Y09CGPq3fLtmtUQfVrj79E0JWu3dvsVcxVIcfhR5h0iXu+/z++zDrYeiJqifRynJkA==} + '@microsoft/fetch-event-source@2.0.1': + resolution: {integrity: sha512-W6CLUJ2eBMw3Rec70qrsEW0jOm/3twwJv21mrmj2yORiaVmVYGS4sSS5yUwvQc1ZlDLYGPnClVWmUUMagKNsfA==} + '@mjackson/headers@0.10.0': resolution: {integrity: sha512-U1Eu1gF979k7ZoIBsJyD+T5l9MjtPONsZfoXfktsQHPJD0s7SokBGx+tLKDLsOY+gzVYAWS0yRFDNY8cgbQzWQ==} @@ -5556,6 +5565,17 @@ packages: resolution: {integrity: sha512-c5myttkNhqaqvLlEz3ttE1qEsULlD6ILBge5FAfEtMv9HVS/pNlgvMKrdFMefaGO/bE4HoxrNGdJsY683Kq32w==} engines: {node: '>=18.20.0'} + '@trigger.dev/core@4.0.5': + resolution: {integrity: sha512-SHelq2kBGdsOHVJzdbp+Twm9kXqmPBMtwVoosAuINC8mYpJzighwUD0e65uwYqgg0DqLldD408ZwfhTEQyDq0A==} + engines: {node: '>=18.20.0'} + + '@trigger.dev/python@4.0.5': + resolution: {integrity: sha512-RowGxUN33WPR42iCJCpmviZiFN/eS1dF1ZC8g1a2fpW8L8nruK5RGiIzSg/ohVmVngarVWp6Fe9H/6YLlRHsBw==} + engines: {node: '>=18.20.0'} + peerDependencies: + '@trigger.dev/build': ^4.0.5 + '@trigger.dev/sdk': ^4.0.5 + '@trigger.dev/react-hooks@4.0.4': resolution: {integrity: sha512-tgyaGKwFTbVaD4QZdR5GBc2R7T/yq+vHpWw506ys75Mo9uEZN0rGmw7g5q1Pe4XJvsdDiVjcxcJ4tK8zwUM5Zg==} engines: {node: '>=18.20.0'} @@ -13923,7 +13943,7 @@ snapshots: '@babel/traverse': 7.27.4 '@babel/types': 7.27.6 convert-source-map: 2.0.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 gensync: 1.0.0-beta.2 json5: 2.2.3 semver: 6.3.1 @@ -13943,7 +13963,7 @@ snapshots: '@babel/traverse': 7.27.4 '@babel/types': 7.27.6 convert-source-map: 2.0.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 gensync: 1.0.0-beta.2 json5: 2.2.3 semver: 6.3.1 @@ -14219,7 +14239,7 @@ snapshots: '@babel/parser': 7.27.5 '@babel/template': 7.27.2 '@babel/types': 7.27.6 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 globals: 11.12.0 transitivePeerDependencies: - supports-color @@ -14495,6 +14515,12 @@ snapshots: optionalDependencies: '@rollup/rollup-darwin-arm64': 4.43.0 + '@electric-sql/client@1.0.14': + dependencies: + '@microsoft/fetch-event-source': 2.0.1 + optionalDependencies: + '@rollup/rollup-darwin-arm64': 4.43.0 + '@emnapi/core@1.4.3': dependencies: '@emnapi/wasi-threads': 1.0.2 @@ -14894,7 +14920,7 @@ snapshots: '@eslint/eslintrc@2.1.4': dependencies: ajv: 6.12.6 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 espree: 9.6.1 globals: 13.24.0 ignore: 5.3.2 @@ -14985,7 +15011,7 @@ snapshots: '@humanwhocodes/config-array@0.13.0': dependencies: '@humanwhocodes/object-schema': 2.0.3 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 minimatch: 3.1.2 transitivePeerDependencies: - supports-color @@ -15109,6 +15135,8 @@ snapshots: transitivePeerDependencies: - supports-color + '@microsoft/fetch-event-source@2.0.1': {} + '@mjackson/headers@0.10.0': {} '@mjackson/headers@0.11.1': {} @@ -15352,7 +15380,7 @@ snapshots: '@opentelemetry/api': 1.9.0 '@opentelemetry/api-logs': 0.203.0 import-in-the-middle: 1.11.0 - require-in-the-middle: 7.5.2(supports-color@10.0.0) + require-in-the-middle: 7.5.2 transitivePeerDependencies: - supports-color @@ -18461,6 +18489,56 @@ snapshots: - supports-color - utf-8-validate + '@trigger.dev/core@4.0.5': + dependencies: + '@bugsnag/cuid': 3.2.1 + '@electric-sql/client': 1.0.14 + '@google-cloud/precise-date': 4.0.0 + '@jsonhero/path': 1.0.21 + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.203.0 + '@opentelemetry/core': 2.0.1(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-logs-otlp-http': 0.203.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-http': 0.203.0(@opentelemetry/api@1.9.0) + '@opentelemetry/instrumentation': 0.203.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resources': 2.0.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.203.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.0.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-node': 2.0.1(@opentelemetry/api@1.9.0) + '@opentelemetry/semantic-conventions': 1.36.0 + dequal: 2.0.3 + eventsource: 3.0.7 + eventsource-parser: 3.0.6 + execa: 8.0.1 + humanize-duration: 3.33.0 + jose: 5.10.0 + nanoid: 3.3.8 + prom-client: 15.1.3 + socket.io: 4.7.4 + socket.io-client: 4.7.5 + std-env: 3.9.0 + superjson: 2.2.2 + tinyexec: 0.3.2 + uncrypto: 0.1.3 + zod: 3.25.76 + zod-error: 1.5.0 + zod-validation-error: 1.5.0(zod@3.25.76) + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + + '@trigger.dev/python@4.0.5(@trigger.dev/build@4.0.4(typescript@5.8.3))(@trigger.dev/sdk@4.0.4(ai@5.0.78(zod@3.25.76))(zod@3.25.76))': + dependencies: + '@trigger.dev/build': 4.0.4(typescript@5.8.3) + '@trigger.dev/core': 4.0.5 + '@trigger.dev/sdk': 4.0.4(ai@5.0.78(zod@3.25.76))(zod@3.25.76) + tinyexec: 0.3.2 + transitivePeerDependencies: + - bufferutil + - supports-color + - utf-8-validate + '@trigger.dev/react-hooks@4.0.4(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@trigger.dev/core': 4.0.4 @@ -18883,7 +18961,7 @@ snapshots: '@typescript-eslint/scope-manager': 5.62.0 '@typescript-eslint/type-utils': 5.62.0(eslint@8.57.1)(typescript@5.8.3) '@typescript-eslint/utils': 5.62.0(eslint@8.57.1)(typescript@5.8.3) - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 graphemer: 1.4.0 ignore: 5.3.2 @@ -18903,7 +18981,7 @@ snapshots: '@typescript-eslint/type-utils': 6.21.0(eslint@8.57.1)(typescript@5.8.3) '@typescript-eslint/utils': 6.21.0(eslint@8.57.1)(typescript@5.8.3) '@typescript-eslint/visitor-keys': 6.21.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 graphemer: 1.4.0 ignore: 5.3.2 @@ -18920,7 +18998,7 @@ snapshots: '@typescript-eslint/scope-manager': 5.62.0 '@typescript-eslint/types': 5.62.0 '@typescript-eslint/typescript-estree': 5.62.0(typescript@5.8.3) - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 optionalDependencies: typescript: 5.8.3 @@ -18933,7 +19011,7 @@ snapshots: '@typescript-eslint/types': 6.21.0 '@typescript-eslint/typescript-estree': 6.21.0(typescript@5.8.3) '@typescript-eslint/visitor-keys': 6.21.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 optionalDependencies: typescript: 5.8.3 @@ -18954,7 +19032,7 @@ snapshots: dependencies: '@typescript-eslint/typescript-estree': 5.62.0(typescript@5.8.3) '@typescript-eslint/utils': 5.62.0(eslint@8.57.1)(typescript@5.8.3) - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 tsutils: 3.21.0(typescript@5.8.3) optionalDependencies: @@ -18966,7 +19044,7 @@ snapshots: dependencies: '@typescript-eslint/typescript-estree': 6.21.0(typescript@5.8.3) '@typescript-eslint/utils': 6.21.0(eslint@8.57.1)(typescript@5.8.3) - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 ts-api-utils: 1.4.3(typescript@5.8.3) optionalDependencies: @@ -18982,7 +19060,7 @@ snapshots: dependencies: '@typescript-eslint/types': 5.62.0 '@typescript-eslint/visitor-keys': 5.62.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 globby: 11.1.0 is-glob: 4.0.3 semver: 7.7.2 @@ -18996,7 +19074,7 @@ snapshots: dependencies: '@typescript-eslint/types': 6.21.0 '@typescript-eslint/visitor-keys': 6.21.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 globby: 11.1.0 is-glob: 4.0.3 minimatch: 9.0.3 @@ -19556,7 +19634,7 @@ snapshots: dependencies: bytes: 3.1.2 content-type: 1.0.5 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 http-errors: 2.0.0 iconv-lite: 0.6.3 on-finished: 2.4.1 @@ -20329,6 +20407,10 @@ snapshots: dependencies: ms: 2.1.3 + debug@4.4.1: + dependencies: + ms: 2.1.3 + debug@4.4.1(supports-color@10.0.0): dependencies: ms: 2.1.3 @@ -20908,7 +20990,7 @@ snapshots: eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.31.0)(eslint@8.57.1): dependencies: '@nolyfill/is-core-module': 1.0.39 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 eslint: 8.57.1 get-tsconfig: 4.10.1 is-bun-module: 2.0.0 @@ -21130,7 +21212,7 @@ snapshots: ajv: 6.12.6 chalk: 4.1.2 cross-spawn: 7.0.6 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 doctrine: 3.0.0 escape-string-regexp: 4.0.0 eslint-scope: 7.2.2 @@ -21339,7 +21421,7 @@ snapshots: content-type: 1.0.5 cookie: 0.7.2 cookie-signature: 1.2.2 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 encodeurl: 2.0.0 escape-html: 1.0.3 etag: 1.8.1 @@ -21443,7 +21525,7 @@ snapshots: finalhandler@2.1.0: dependencies: - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 encodeurl: 2.0.0 escape-html: 1.0.3 on-finished: 2.4.1 @@ -21979,7 +22061,7 @@ snapshots: dependencies: '@ioredis/commands': 1.2.0 cluster-key-slot: 1.1.2 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 denque: 2.1.0 lodash.defaults: 4.2.0 lodash.isarguments: 3.1.0 @@ -23143,7 +23225,7 @@ snapshots: micromark@3.2.0: dependencies: '@types/debug': 4.1.12 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 decode-named-character-reference: 1.1.0 micromark-core-commonmark: 1.1.0 micromark-factory-space: 1.1.0 @@ -23165,7 +23247,7 @@ snapshots: micromark@4.0.2: dependencies: '@types/debug': 4.1.12 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 decode-named-character-reference: 1.1.0 devlop: 1.1.0 micromark-core-commonmark: 2.0.3 @@ -24870,6 +24952,14 @@ snapshots: require-from-string@2.0.2: {} + require-in-the-middle@7.5.2: + dependencies: + debug: 4.4.1 + module-details-from-path: 1.0.4 + resolve: 1.22.10 + transitivePeerDependencies: + - supports-color + require-in-the-middle@7.5.2(supports-color@10.0.0): dependencies: debug: 4.4.1(supports-color@10.0.0) @@ -24971,7 +25061,7 @@ snapshots: router@2.2.0: dependencies: - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 depd: 2.0.0 is-promise: 4.0.0 parseurl: 1.3.3 @@ -25090,7 +25180,7 @@ snapshots: send@1.2.0: dependencies: - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 encodeurl: 2.0.0 escape-html: 1.0.3 etag: 1.8.1 @@ -25213,7 +25303,7 @@ snapshots: dependencies: '@hapi/hoek': 11.0.7 '@hapi/wreck': 18.1.0 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 joi: 17.13.3 transitivePeerDependencies: - supports-color @@ -25859,7 +25949,7 @@ snapshots: cac: 6.7.14 chokidar: 4.0.3 consola: 3.4.2 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 esbuild: 0.25.5 fix-dts-default-cjs-exports: 1.0.1 joycon: 3.1.1 @@ -26268,7 +26358,7 @@ snapshots: vite-node@1.6.1(@types/node@20.19.7)(less@4.4.0)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.42.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 pathe: 1.1.2 picocolors: 1.1.1 vite: 5.4.19(@types/node@20.19.7)(less@4.4.0)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.42.0) @@ -26286,7 +26376,7 @@ snapshots: vite-node@3.2.3(@types/node@20.19.7)(jiti@2.4.2)(less@4.4.0)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.42.0)(tsx@4.20.4)(yaml@2.8.0): dependencies: cac: 6.7.14 - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 es-module-lexer: 1.7.0 pathe: 2.0.3 vite: 6.3.5(@types/node@20.19.7)(jiti@2.4.2)(less@4.4.0)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.42.0)(tsx@4.20.4)(yaml@2.8.0) @@ -26306,7 +26396,7 @@ snapshots: vite-tsconfig-paths@4.3.2(typescript@5.8.3)(vite@6.3.5(@types/node@20.19.7)(jiti@2.4.2)(less@4.4.0)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.42.0)(tsx@4.20.4)(yaml@2.8.0)): dependencies: - debug: 4.4.1(supports-color@10.0.0) + debug: 4.4.1 globrex: 0.1.2 tsconfck: 3.1.6(typescript@5.8.3) optionalDependencies: