feat: space v3

This commit is contained in:
Harshith Mullapudi 2025-10-29 16:16:27 +05:30
parent c5407be54d
commit 6d850f3d89
33 changed files with 1103 additions and 1425 deletions

View File

@ -41,10 +41,7 @@ NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=27192e6432564f4788d55c15131bd5ac
OPENAI_API_KEY=
MAGIC_LINK_SECRET=27192e6432564f4788d55c15131bd5ac
NEO4J_AUTH=neo4j/27192e6432564f4788d55c15131bd5ac
OLLAMA_URL=http://ollama:11434

View File

@ -7,32 +7,6 @@ on:
workflow_dispatch:
jobs:
build-init:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
ref: main
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to Docker Registry
run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin
- name: Build and Push Frontend Docker Image
uses: docker/build-push-action@v2
with:
context: .
file: ./apps/init/Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: redplanethq/init:${{ github.ref_name }}
build-webapp:
runs-on: ubuntu-latest

51
apps/init/.gitignore vendored
View File

@ -1,51 +0,0 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# Dependencies
node_modules
.pnp
.pnp.js
# Local env files
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# Testing
coverage
# Turbo
.turbo
# Vercel
.vercel
# Build Outputs
.next/
out/
build
dist
.tshy/
.tshy-build/
# Debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Misc
.DS_Store
*.pem
docker-compose.dev.yaml
clickhouse/
.vscode/
registry/
.cursor
CLAUDE.md
.claude

View File

@ -1,70 +0,0 @@
ARG NODE_IMAGE=node:20.11.1-bullseye-slim@sha256:5a5a92b3a8d392691c983719dbdc65d9f30085d6dcd65376e7a32e6fe9bf4cbe
FROM ${NODE_IMAGE} AS pruner
WORKDIR /core
COPY --chown=node:node . .
RUN npx -q turbo@2.5.3 prune --scope=@redplanethq/init --docker
RUN find . -name "node_modules" -type d -prune -exec rm -rf '{}' +
# Base strategy to have layer caching
FROM ${NODE_IMAGE} AS base
RUN apt-get update && apt-get install -y openssl dumb-init postgresql-client
WORKDIR /core
COPY --chown=node:node .gitignore .gitignore
COPY --from=pruner --chown=node:node /core/out/json/ .
COPY --from=pruner --chown=node:node /core/out/pnpm-lock.yaml ./pnpm-lock.yaml
COPY --from=pruner --chown=node:node /core/out/pnpm-workspace.yaml ./pnpm-workspace.yaml
## Dev deps
FROM base AS dev-deps
WORKDIR /core
# Corepack is used to install pnpm
RUN corepack enable
ENV NODE_ENV development
RUN pnpm install --ignore-scripts --no-frozen-lockfile
## Production deps
FROM base AS production-deps
WORKDIR /core
# Corepack is used to install pnpm
RUN corepack enable
ENV NODE_ENV production
RUN pnpm install --prod --no-frozen-lockfile
## Builder (builds the init CLI)
FROM base AS builder
WORKDIR /core
# Corepack is used to install pnpm
RUN corepack enable
COPY --from=pruner --chown=node:node /core/out/full/ .
COPY --from=dev-deps --chown=node:node /core/ .
COPY --chown=node:node turbo.json turbo.json
COPY --chown=node:node .configs/tsconfig.base.json .configs/tsconfig.base.json
RUN pnpm run build --filter=@redplanethq/init...
# Runner
FROM ${NODE_IMAGE} AS runner
RUN apt-get update && apt-get install -y openssl postgresql-client ca-certificates
WORKDIR /core
RUN corepack enable
ENV NODE_ENV production
COPY --from=base /usr/bin/dumb-init /usr/bin/dumb-init
COPY --from=pruner --chown=node:node /core/out/full/ .
COPY --from=production-deps --chown=node:node /core .
COPY --from=builder --chown=node:node /core/apps/init/dist ./apps/init/dist
# Copy the trigger dump file
COPY --chown=node:node apps/init/trigger.dump ./apps/init/trigger.dump
# Copy and set up entrypoint script
COPY --chown=node:node apps/init/entrypoint.sh ./apps/init/entrypoint.sh
RUN chmod +x ./apps/init/entrypoint.sh
USER node
WORKDIR /core/apps/init
ENTRYPOINT ["dumb-init", "--"]
CMD ["./entrypoint.sh"]

View File

@ -1,197 +0,0 @@
# Core CLI
🧠 **CORE - Contextual Observation & Recall Engine**
A Command-Line Interface for setting up and managing the Core development environment.
## Installation
```bash
npm install -g @redplanethq/core
```
## Commands
### `core init`
**One-time setup command** - Initializes the Core development environment with full configuration.
### `core start`
**Daily usage command** - Starts all Core services (Docker containers).
### `core stop`
**Daily usage command** - Stops all Core services (Docker containers).
## Getting Started
### Prerequisites
- **Node.js** (v18.20.0 or higher)
- **Docker** and **Docker Compose**
- **Git**
- **pnpm** package manager
### Initial Setup
1. **Clone the Core repository:**
```bash
git clone https://github.com/redplanethq/core.git
cd core
```
2. **Run the initialization command:**
```bash
core init
```
3. **The CLI will guide you through the complete setup process:**
#### Step 1: Prerequisites Check
- The CLI shows a checklist of required tools
- Confirms you're in the Core repository directory
- Exits with instructions if prerequisites aren't met
#### Step 2: Environment Configuration
- Copies `.env.example` to `.env` in the root directory
- Copies `trigger/.env.example` to `trigger/.env`
- Skips copying if `.env` files already exist
#### Step 3: Docker Services Startup
- Starts main Core services: `docker compose up -d`
- Starts Trigger.dev services: `docker compose up -d` (in trigger/ directory)
- Shows real-time output with progress indicators
#### Step 4: Database Health Check
- Verifies PostgreSQL is running on `localhost:5432`
- Retries for up to 60 seconds if needed
#### Step 5: Trigger.dev Setup (Interactive)
- **If Trigger.dev is not configured:**
1. Prompts you to open http://localhost:8030
2. Asks you to login to Trigger.dev
3. Guides you to create an organization and project
4. Collects your Project ID and Secret Key
5. Updates `.env` with your Trigger.dev configuration
6. Restarts Core services with new configuration
- **If Trigger.dev is already configured:**
- Skips setup and shows "Configuration already exists" message
#### Step 6: Docker Registry Login
- Displays docker login command with credentials from `.env`
- Waits for you to complete the login process
#### Step 7: Trigger.dev Task Deployment
- Automatically runs: `npx trigger.dev@v4-beta login -a http://localhost:8030`
- Deploys tasks with: `pnpm trigger:deploy`
- Shows manual deployment instructions if automatic deployment fails
#### Step 8: Setup Complete!
- Confirms all services are running
- Shows service URLs and connection information
## Daily Usage
After initial setup, use these commands for daily development:
### Start Services
```bash
core start
```
Starts all Docker containers for Core development.
### Stop Services
```bash
core stop
```
Stops all Docker containers.
## Service URLs
After setup, these services will be available:
- **Core Application**: http://localhost:3033
- **Trigger.dev**: http://localhost:8030
- **PostgreSQL**: localhost:5432
## Troubleshooting
### Repository Not Found
If you run commands outside the Core repository:
- The CLI will ask you to confirm you're in the Core repository
- If not, it provides instructions to clone the repository
- Navigate to the Core repository directory before running commands again
### Docker Issues
- Ensure Docker is running
- Check Docker Compose is installed
- Verify you have sufficient system resources
### Trigger.dev Setup Issues
- Check container logs: `docker logs trigger-webapp --tail 50`
- Ensure you can access http://localhost:8030
- Verify your network allows connections to localhost
### Environment Variables
The CLI automatically manages these environment variables:
- `TRIGGER_PROJECT_ID` - Your Trigger.dev project ID
- `TRIGGER_SECRET_KEY` - Your Trigger.dev secret key
- Docker registry credentials for deployment
### Manual Trigger.dev Deployment
If automatic deployment fails, run manually:
```bash
npx trigger.dev@v4-beta login -a http://localhost:8030
pnpm trigger:deploy
```
## Development Workflow
1. **First time setup:** `core init`
2. **Daily development:**
- `core start` - Start your development environment
- Do your development work
- `core stop` - Stop services when done
## Support
For issues and questions:
- Check the main Core repository: https://github.com/redplanethq/core
- Review Docker container logs for troubleshooting
- Ensure all prerequisites are properly installed
## Features
- 🚀 **One-command setup** - Complete environment initialization
- 🔄 **Smart configuration** - Skips already configured components
- 📱 **Real-time feedback** - Live progress indicators and output
- 🐳 **Docker integration** - Full container lifecycle management
- 🔧 **Interactive setup** - Guided configuration process
- 🎯 **Error handling** - Graceful failure with recovery instructions
---
**Happy coding with Core!** 🎉

View File

@ -1,22 +0,0 @@
#!/bin/sh
# Exit on any error
set -e
echo "Starting init CLI..."
# Wait for database to be ready
echo "Waiting for database connection..."
until pg_isready -h "${DB_HOST:-localhost}" -p "${DB_PORT:-5432}" -U "${POSTGRES_USER:-docker}"; do
echo "Database is unavailable - sleeping"
sleep 2
done
echo "Database is ready!"
# Run the init command
echo "Running init command..."
node ./dist/esm/index.js init
echo "Init completed successfully!"
exit 0

View File

@ -1,145 +0,0 @@
{
"name": "@redplanethq/init",
"version": "0.1.0",
"description": "A init service to create trigger instance",
"type": "module",
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/redplanethq/core",
"directory": "apps/init"
},
"publishConfig": {
"access": "public"
},
"keywords": [
"typescript"
],
"files": [
"dist",
"trigger.dump"
],
"bin": {
"core": "./dist/esm/index.js"
},
"tshy": {
"selfLink": false,
"main": false,
"module": false,
"dialects": [
"esm"
],
"project": "./tsconfig.json",
"exclude": [
"**/*.test.ts"
],
"exports": {
"./package.json": "./package.json",
".": "./src/index.ts"
}
},
"devDependencies": {
"@epic-web/test-server": "^0.1.0",
"@types/gradient-string": "^1.1.2",
"@types/ini": "^4.1.1",
"@types/object-hash": "3.0.6",
"@types/polka": "^0.5.7",
"@types/react": "^18.2.48",
"@types/resolve": "^1.20.6",
"@types/rimraf": "^4.0.5",
"@types/semver": "^7.5.0",
"@types/source-map-support": "0.5.10",
"@types/ws": "^8.5.3",
"cpy-cli": "^5.0.0",
"execa": "^8.0.1",
"find-up": "^7.0.0",
"rimraf": "^5.0.7",
"ts-essentials": "10.0.1",
"tshy": "^3.0.2",
"tsx": "4.17.0"
},
"scripts": {
"clean": "rimraf dist .tshy .tshy-build .turbo",
"typecheck": "tsc -p tsconfig.src.json --noEmit",
"build": "tshy",
"test": "vitest",
"test:e2e": "vitest --run -c ./e2e/vitest.config.ts"
},
"dependencies": {
"@clack/prompts": "^0.10.0",
"@depot/cli": "0.0.1-cli.2.80.0",
"@opentelemetry/api": "1.9.0",
"@opentelemetry/api-logs": "0.52.1",
"@opentelemetry/exporter-logs-otlp-http": "0.52.1",
"@opentelemetry/exporter-trace-otlp-http": "0.52.1",
"@opentelemetry/instrumentation": "0.52.1",
"@opentelemetry/instrumentation-fetch": "0.52.1",
"@opentelemetry/resources": "1.25.1",
"@opentelemetry/sdk-logs": "0.52.1",
"@opentelemetry/sdk-node": "0.52.1",
"@opentelemetry/sdk-trace-base": "1.25.1",
"@opentelemetry/sdk-trace-node": "1.25.1",
"@opentelemetry/semantic-conventions": "1.25.1",
"ansi-escapes": "^7.0.0",
"braces": "^3.0.3",
"c12": "^1.11.1",
"chalk": "^5.2.0",
"chokidar": "^3.6.0",
"cli-table3": "^0.6.3",
"commander": "^9.4.1",
"defu": "^6.1.4",
"dotenv": "^16.4.5",
"dotenv-expand": "^12.0.2",
"esbuild": "^0.23.0",
"eventsource": "^3.0.2",
"evt": "^2.4.13",
"fast-npm-meta": "^0.2.2",
"git-last-commit": "^1.0.1",
"gradient-string": "^2.0.2",
"has-flag": "^5.0.1",
"import-in-the-middle": "1.11.0",
"import-meta-resolve": "^4.1.0",
"ini": "^5.0.0",
"jsonc-parser": "3.2.1",
"magicast": "^0.3.4",
"minimatch": "^10.0.1",
"mlly": "^1.7.1",
"nypm": "^0.5.4",
"nanoid": "3.3.8",
"object-hash": "^3.0.0",
"open": "^10.0.3",
"knex": "3.1.0",
"p-limit": "^6.2.0",
"p-retry": "^6.1.0",
"partysocket": "^1.0.2",
"pkg-types": "^1.1.3",
"polka": "^0.5.2",
"pg": "8.16.3",
"resolve": "^1.22.8",
"semver": "^7.5.0",
"signal-exit": "^4.1.0",
"source-map-support": "0.5.21",
"std-env": "^3.7.0",
"supports-color": "^10.0.0",
"tiny-invariant": "^1.2.0",
"tinyexec": "^0.3.1",
"tinyglobby": "^0.2.10",
"uuid": "11.1.0",
"ws": "^8.18.0",
"xdg-app-paths": "^8.3.0",
"zod": "3.23.8",
"zod-validation-error": "^1.5.0"
},
"engines": {
"node": ">=18.20.0"
},
"exports": {
"./package.json": "./package.json",
".": {
"import": {
"types": "./dist/esm/index.d.ts",
"default": "./dist/esm/index.js"
}
}
}
}

View File

@ -1,14 +0,0 @@
import { Command } from "commander";
import { initCommand } from "../commands/init.js";
import { VERSION } from "./version.js";
const program = new Command();
program.name("core").description("Core CLI - A Command-Line Interface for Core").version(VERSION);
program
.command("init")
.description("Initialize Core development environment (run once)")
.action(initCommand);
program.parse(process.argv);

View File

@ -1,3 +0,0 @@
import { env } from "../utils/env.js";
export const VERSION = env.VERSION;

View File

@ -1,36 +0,0 @@
import { intro, outro, note } from "@clack/prompts";
import { printCoreBrainLogo } from "../utils/ascii.js";
import { initTriggerDatabase, updateWorkerImage } from "../utils/trigger.js";
export async function initCommand() {
// Display the CORE brain logo
printCoreBrainLogo();
intro("🚀 Core Development Environment Setup");
try {
await initTriggerDatabase();
await updateWorkerImage();
note(
[
"Your services will start running:",
"",
"• Core Application: http://localhost:3033",
"• Trigger.dev: http://localhost:8030",
"• PostgreSQL: localhost:5432",
"",
"You can now start developing with Core!",
"",
" When logging in to the Core Application, you can find the login URL in the Docker container logs:",
" docker logs core-app --tail 50",
].join("\n"),
"🚀 Services Running"
);
outro("🎉 Setup Complete!");
process.exit(0);
} catch (error: any) {
outro(`❌ Setup failed: ${error.message}`);
process.exit(1);
}
}

View File

@ -1,3 +0,0 @@
#!/usr/bin/env node
import "./cli/index.js";

View File

@ -1,29 +0,0 @@
import chalk from "chalk";
import { VERSION } from "../cli/version.js";
export function printCoreBrainLogo(): void {
const brain = `
o o o
o o---o---o o
o---o o o---o---o
o o---o---o---o o
o---o o o---o---o
o o---o---o o
o o o
`;
console.log(chalk.cyan(brain));
console.log(
chalk.bold.white(
` 🧠 CORE - Contextual Observation & Recall Engine ${VERSION ? chalk.gray(`(${VERSION})`) : ""}\n`
)
);
}

View File

@ -1,24 +0,0 @@
import { z } from "zod";
const EnvironmentSchema = z.object({
// Version
VERSION: z.string().default("0.1.24"),
// Database
DB_HOST: z.string().default("localhost"),
DB_PORT: z.string().default("5432"),
TRIGGER_DB: z.string().default("trigger"),
POSTGRES_USER: z.string().default("docker"),
POSTGRES_PASSWORD: z.string().default("docker"),
// Trigger database
TRIGGER_TASKS_IMAGE: z.string().default("redplanethq/proj_core:latest"),
// Node environment
NODE_ENV: z
.union([z.literal("development"), z.literal("production"), z.literal("test")])
.default("development"),
});
export type Environment = z.infer<typeof EnvironmentSchema>;
export const env = EnvironmentSchema.parse(process.env);

View File

@ -1,182 +0,0 @@
import Knex from "knex";
import path from "path";
import { fileURLToPath } from "url";
import { env } from "./env.js";
import { spinner, note, log } from "@clack/prompts";
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
/**
* Returns a PostgreSQL database URL for the given database name.
* Throws if required environment variables are missing.
*/
export function getDatabaseUrl(dbName: string): string {
const { POSTGRES_USER, POSTGRES_PASSWORD, DB_HOST, DB_PORT } = env;
if (!POSTGRES_USER || !POSTGRES_PASSWORD || !DB_HOST || !DB_PORT || !dbName) {
throw new Error(
"One or more required environment variables are missing: POSTGRES_USER, POSTGRES_PASSWORD, DB_HOST, DB_PORT, dbName"
);
}
return `postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${DB_HOST}:${DB_PORT}/${dbName}`;
}
/**
* Checks if the database specified by TRIGGER_DB exists, and creates it if it does not.
* Returns { exists: boolean, created: boolean } - exists indicates success, created indicates if database was newly created.
*/
export async function ensureDatabaseExists(): Promise<{ exists: boolean; created: boolean }> {
const { TRIGGER_DB } = env;
if (!TRIGGER_DB) {
throw new Error("TRIGGER_DB environment variable is missing");
}
// Build a connection string to the default 'postgres' database
const adminDbUrl = getDatabaseUrl("postgres");
// Create a Knex instance for the admin connection
const adminKnex = Knex({
client: "pg",
connection: adminDbUrl,
});
const s = spinner();
s.start("Checking for Trigger.dev database...");
try {
// Check if the database exists
const result = await adminKnex.select(1).from("pg_database").where("datname", TRIGGER_DB);
if (result.length === 0) {
s.message("Database not found. Creating...");
// Database does not exist, create it
await adminKnex.raw(`CREATE DATABASE "${TRIGGER_DB}"`);
s.stop("Database created.");
return { exists: true, created: true };
} else {
s.stop("Database exists.");
return { exists: true, created: false };
}
} catch (err) {
s.stop("Failed to ensure database exists.");
log.warning("Failed to ensure database exists: " + (err as Error).message);
return { exists: false, created: false };
} finally {
await adminKnex.destroy();
}
}
// Main initialization function
export async function initTriggerDatabase() {
const { TRIGGER_DB } = env;
if (!TRIGGER_DB) {
throw new Error("TRIGGER_DB environment variable is missing");
}
// Ensure the database exists
const { exists, created } = await ensureDatabaseExists();
if (!exists) {
throw new Error("Failed to create or verify database exists");
}
// Only run pg_restore if the database was newly created
if (!created) {
note("Database already exists, skipping restore from trigger.dump");
return;
}
// Run pg_restore with the trigger.dump file
const dumpFilePath = path.join(__dirname, "../../../trigger.dump");
const connectionString = getDatabaseUrl(TRIGGER_DB);
const s = spinner();
s.start("Restoring database from trigger.dump...");
try {
// Use execSync and capture stdout/stderr, send to spinner.log
const { spawn } = await import("child_process");
await new Promise<void>((resolve, reject) => {
const child = spawn(
"pg_restore",
["--verbose", "--no-acl", "--no-owner", "-d", connectionString, dumpFilePath],
{ stdio: ["ignore", "pipe", "pipe"] }
);
child.stdout.on("data", (data) => {
s.message(data.toString());
});
child.stderr.on("data", (data) => {
s.message(data.toString());
});
child.on("close", (code) => {
if (code === 0) {
s.stop("Database restored successfully from trigger.dump");
resolve();
} else {
s.stop("Failed to restore database.");
log.warning(`Failed to restore database: pg_restore exited with code ${code}`);
reject(new Error(`Database restore failed: pg_restore exited with code ${code}`));
}
});
child.on("error", (err) => {
s.stop("Failed to restore database.");
log.warning("Failed to restore database: " + err.message);
reject(new Error(`Database restore failed: ${err.message}`));
});
});
} catch (error: any) {
s.stop("Failed to restore database.");
log.warning("Failed to restore database: " + error.message);
throw new Error(`Database restore failed: ${error.message}`);
}
}
export async function updateWorkerImage() {
const { TRIGGER_DB, TRIGGER_TASKS_IMAGE } = env;
if (!TRIGGER_DB) {
throw new Error("TRIGGER_DB environment variable is missing");
}
const connectionString = getDatabaseUrl(TRIGGER_DB);
const knex = Knex({
client: "pg",
connection: connectionString,
});
const s = spinner();
s.start("Updating worker image reference...");
try {
// Get the first record from WorkerDeployment table
const firstWorkerDeployment = await knex("WorkerDeployment").select("id").first();
if (!firstWorkerDeployment) {
s.stop("No WorkerDeployment records found, skipping image update");
note("No WorkerDeployment records found, skipping image update");
return;
}
// Update the imageReference column with the TRIGGER_TASKS_IMAGE value
await knex("WorkerDeployment").where("id", firstWorkerDeployment.id).update({
imageReference: TRIGGER_TASKS_IMAGE,
updatedAt: new Date(),
});
s.stop(`Successfully updated worker image reference to: ${TRIGGER_TASKS_IMAGE}`);
} catch (error: any) {
s.stop("Failed to update worker image.");
log.warning("Failed to update worker image: " + error.message);
throw new Error(`Worker image update failed: ${error.message}`);
} finally {
await knex.destroy();
}
}

Binary file not shown.

View File

@ -1,40 +0,0 @@
{
"include": ["./src/**/*.ts"],
"exclude": ["./src/**/*.test.ts"],
"compilerOptions": {
"target": "es2022",
"lib": ["ES2022", "DOM", "DOM.Iterable", "DOM.AsyncIterable"],
"module": "NodeNext",
"moduleResolution": "NodeNext",
"moduleDetection": "force",
"verbatimModuleSyntax": false,
"jsx": "react",
"strict": true,
"alwaysStrict": true,
"strictPropertyInitialization": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noImplicitAny": true,
"noImplicitReturns": true,
"noImplicitThis": true,
"noFallthroughCasesInSwitch": true,
"resolveJsonModule": true,
"removeComments": false,
"esModuleInterop": true,
"emitDecoratorMetadata": false,
"experimentalDecorators": false,
"downlevelIteration": true,
"isolatedModules": true,
"noUncheckedIndexedAccess": true,
"pretty": true,
"isolatedDeclarations": false,
"composite": true,
"sourceMap": true
}
}

View File

@ -1,8 +0,0 @@
import { configDefaults, defineConfig } from "vitest/config";
export default defineConfig({
test: {
globals: true,
exclude: [...configDefaults.exclude, "e2e/**/*"],
},
});

View File

@ -0,0 +1,299 @@
# BERT Topic Modeling CLI for Echo Episodes
This CLI tool performs topic modeling on Echo episodes using BERTopic. It connects to Neo4j, retrieves episodes with their pre-computed embeddings for a given user, and discovers thematic clusters using HDBSCAN clustering.
## Features
- Connects to Neo4j database to fetch episodes
- Uses pre-computed embeddings (no need to regenerate them)
- Performs semantic topic clustering with BERTopic
- Displays topics with:
- Top keywords per topic
- Episode count per topic
- Sample episodes for each topic
- Configurable minimum topic size
- Environment variable support for easy configuration
## Prerequisites
- Python 3.8+
- Access to Neo4j database with episodes stored
- Pre-computed embeddings stored in Neo4j (in `contentEmbedding` field)
## Installation
1. Navigate to the bert directory:
```bash
cd apps/webapp/app/bert
```
2. Install dependencies:
```bash
pip install -r requirements.txt
```
## Configuration
The CLI can read Neo4j connection details from:
1. **Environment variables** (recommended) - Create a `.env` file or export:
```bash
export NEO4J_URI=bolt://localhost:7687
export NEO4J_USERNAME=neo4j
export NEO4J_PASSWORD=your_password
```
2. **Command-line options** - Pass credentials directly as flags
3. **From project root** - The tool automatically loads `.env` from the project root
## Usage
### Basic Usage
Using environment variables (most common):
```bash
python main.py <user_id>
```
### Advanced Options
```bash
python main.py <user_id> [OPTIONS]
```
**Options:**
- `--min-topic-size INTEGER`: Minimum number of episodes per topic (default: 10)
- `--nr-topics INTEGER`: Target number of topics for reduction (optional)
- `--propose-spaces`: Generate space proposals using OpenAI (requires OPENAI_API_KEY)
- `--openai-api-key TEXT`: OpenAI API key for space proposals (or use OPENAI_API_KEY env var)
- `--json`: Output only final results in JSON format (suppresses all other output)
- `--neo4j-uri TEXT`: Neo4j connection URI (default: bolt://localhost:7687)
- `--neo4j-username TEXT`: Neo4j username (default: neo4j)
- `--neo4j-password TEXT`: Neo4j password (required)
### Examples
1. **Basic usage with environment variables:**
```bash
python main.py user-123
```
2. **Custom minimum topic size:**
```bash
python main.py user-123 --min-topic-size 10
```
3. **Explicit credentials:**
```bash
python main.py user-123 \
--neo4j-uri bolt://neo4j:7687 \
--neo4j-username neo4j \
--neo4j-password mypassword
```
4. **Using Docker compose Neo4j:**
```bash
python main.py user-123 \
--neo4j-uri bolt://localhost:7687 \
--neo4j-password 27192e6432564f4788d55c15131bd5ac
```
5. **With space proposals:**
```bash
python main.py user-123 --propose-spaces
```
6. **JSON output mode (for programmatic use):**
```bash
python main.py user-123 --json
```
7. **JSON output with space proposals:**
```bash
python main.py user-123 --propose-spaces --json
```
### Get Help
```bash
python main.py --help
```
## Output Formats
### Human-Readable Output (Default)
The CLI outputs:
```
================================================================================
BERT TOPIC MODELING FOR ECHO EPISODES
================================================================================
User ID: user-123
Min Topic Size: 20
================================================================================
✓ Connected to Neo4j at bolt://localhost:7687
✓ Fetched 150 episodes with embeddings
🔍 Running BERTopic analysis (min_topic_size=20)...
✓ Topic modeling complete
================================================================================
TOPIC MODELING RESULTS
================================================================================
Total Topics Found: 5
Total Episodes: 150
================================================================================
────────────────────────────────────────────────────────────────────────────────
Topic 0: 45 episodes
────────────────────────────────────────────────────────────────────────────────
Keywords: authentication, login, user, security, session, password, token, oauth, jwt, credentials
Sample Episodes (showing up to 3):
1. [uuid-123]
Discussing authentication flow for the new user login system...
2. [uuid-456]
Implementing OAuth2 with JWT tokens for secure sessions...
3. [uuid-789]
Password reset functionality with email verification...
────────────────────────────────────────────────────────────────────────────────
Topic 1: 32 episodes
────────────────────────────────────────────────────────────────────────────────
Keywords: database, neo4j, query, graph, cypher, nodes, relationships, index, performance, optimization
Sample Episodes (showing up to 3):
...
Topic -1 (Outliers): 8 episodes
================================================================================
✓ Analysis complete!
================================================================================
✓ Neo4j connection closed
```
### JSON Output Mode (--json flag)
When using the `--json` flag, the tool outputs only a clean JSON object with no debug logs:
```json
{
"topics": {
"0": {
"keywords": ["authentication", "login", "user", "security", "session"],
"episodeIds": ["uuid-123", "uuid-456", "uuid-789"]
},
"1": {
"keywords": ["database", "neo4j", "query", "graph", "cypher"],
"episodeIds": ["uuid-abc", "uuid-def"]
}
},
"spaces": [
{
"name": "User Authentication",
"intent": "Episodes about user authentication, login systems, and security belong in this space.",
"confidence": 85,
"topics": [0, 3],
"estimatedEpisodes": 120
}
]
}
```
**JSON Output Structure:**
- `topics`: Dictionary of topic IDs with keywords and episode UUIDs
- `spaces`: Array of space proposals (only if `--propose-spaces` is used)
- `name`: Space name (2-5 words)
- `intent`: Classification intent (1-2 sentences)
- `confidence`: Confidence score (0-100)
- `topics`: Source topic IDs that form this space
- `estimatedEpisodes`: Estimated number of episodes in this space
**Use Cases for JSON Mode:**
- Programmatic consumption by other tools
- Piping output to jq or other JSON processors
- Integration with CI/CD pipelines
- Automated space creation workflows
## How It Works
1. **Connection**: Establishes connection to Neo4j database
2. **Data Fetching**: Queries all episodes for the given userId that have:
- Non-null `contentEmbedding` field
- Non-empty content
3. **Topic Modeling**: Runs BERTopic with:
- Pre-computed embeddings (no re-embedding needed)
- HDBSCAN clustering (automatic cluster discovery)
- Keyword extraction via c-TF-IDF
4. **Results**: Displays topics with keywords and sample episodes
## Neo4j Query
The tool uses this Cypher query to fetch episodes:
```cypher
MATCH (e:Episode {userId: $userId})
WHERE e.contentEmbedding IS NOT NULL
AND size(e.contentEmbedding) > 0
AND e.content IS NOT NULL
AND e.content <> ''
RETURN e.uuid as uuid,
e.content as content,
e.contentEmbedding as embedding,
e.createdAt as createdAt
ORDER BY e.createdAt DESC
```
## Tuning Parameters
- **`--min-topic-size`**:
- Smaller values (5-10): More granular topics, may include noise
- Larger values (20-30): Broader topics, more coherent but fewer clusters
- Recommended: Start with 20 and adjust based on your data
## Troubleshooting
### No episodes found
- Verify the userId exists in Neo4j
- Check that episodes have `contentEmbedding` populated
- Ensure episodes have non-empty `content` field
### Connection errors
- Verify Neo4j is running: `docker ps | grep neo4j`
- Check URI format: should be `bolt://host:port`
- Verify credentials are correct
### Too few/many topics
- Adjust `--min-topic-size` parameter
- Need more topics: decrease the value (e.g., `--min-topic-size 10`)
- Need fewer topics: increase the value (e.g., `--min-topic-size 30`)
## Dependencies
- `bertopic>=0.16.0` - Topic modeling
- `neo4j>=5.14.0` - Neo4j Python driver
- `click>=8.1.0` - CLI framework
- `numpy>=1.24.0` - Numerical operations
- `python-dotenv>=1.0.0` - Environment variable loading
## License
Part of the Echo project.

View File

@ -0,0 +1,384 @@
#!/usr/bin/env python3
"""
BERT Topic Modeling CLI for Echo Episodes
This CLI tool connects to Neo4j, retrieves episodes with their embeddings for a given userId,
and performs topic modeling using BERTopic to discover thematic clusters.
"""
import os
import sys
import json
from typing import List, Tuple, Dict, Any
import click
import numpy as np
from neo4j import GraphDatabase
from bertopic import BERTopic
from bertopic.vectorizers import ClassTfidfTransformer
from dotenv import load_dotenv
from sklearn.feature_extraction.text import CountVectorizer
from umap import UMAP
from hdbscan import HDBSCAN
class Neo4jConnection:
"""Manages Neo4j database connection."""
def __init__(self, uri: str, username: str, password: str, quiet: bool = False):
"""Initialize Neo4j connection.
Args:
uri: Neo4j connection URI (e.g., bolt://localhost:7687)
username: Neo4j username
password: Neo4j password
quiet: If True, suppress output messages
"""
self.quiet = quiet
try:
self.driver = GraphDatabase.driver(uri, auth=(username, password))
# Verify connection
self.driver.verify_connectivity()
if not quiet:
click.echo(f"✓ Connected to Neo4j at {uri}")
except Exception as e:
if not quiet:
click.echo(f"✗ Failed to connect to Neo4j: {e}", err=True)
sys.exit(1)
def close(self):
"""Close the Neo4j connection."""
if self.driver:
self.driver.close()
if not self.quiet:
click.echo("✓ Neo4j connection closed")
def get_episodes_with_embeddings(self, user_id: str) -> Tuple[List[str], List[str], np.ndarray]:
"""Fetch all episodes with their embeddings for a given user.
Args:
user_id: The user ID to fetch episodes for
Returns:
Tuple of (episode_uuids, episode_contents, embeddings_array)
"""
query = """
MATCH (e:Episode {userId: $userId})
WHERE e.contentEmbedding IS NOT NULL
AND size(e.contentEmbedding) > 0
AND e.content IS NOT NULL
AND e.content <> ''
RETURN e.uuid as uuid,
e.content as content,
e.contentEmbedding as embedding,
e.createdAt as createdAt
ORDER BY e.createdAt DESC
"""
with self.driver.session() as session:
result = session.run(query, userId=user_id)
records = list(result)
if not records:
if not self.quiet:
click.echo(f"✗ No episodes found for userId: {user_id}", err=True)
sys.exit(1)
uuids = []
contents = []
embeddings = []
for record in records:
uuids.append(record["uuid"])
contents.append(record["content"])
embeddings.append(record["embedding"])
embeddings_array = np.array(embeddings, dtype=np.float32)
if not self.quiet:
click.echo(f"✓ Fetched {len(contents)} episodes with embeddings")
return uuids, contents, embeddings_array
def run_bertopic_analysis(
contents: List[str],
embeddings: np.ndarray,
min_topic_size: int = 20,
nr_topics: int = None,
quiet: bool = False
) -> Tuple[BERTopic, List[int], List[float]]:
"""Run BERTopic clustering on episode contents with improved configuration.
Args:
contents: List of episode content strings
embeddings: Pre-computed embeddings for the episodes
min_topic_size: Minimum number of documents per topic
nr_topics: Target number of topics (optional, for topic reduction)
quiet: If True, suppress output messages
Returns:
Tuple of (bertopic_model, topic_assignments, probabilities)
"""
if not quiet:
click.echo(f"\n🔍 Running BERTopic analysis (min_topic_size={min_topic_size})...")
# Step 1: Configure UMAP for dimensionality reduction
# More aggressive reduction helps find distinct clusters
umap_model = UMAP(
n_neighbors=15, # Balance between local/global structure
n_components=5, # Reduce to 5 dimensions
min_dist=0.0, # Allow tight clusters
metric='cosine', # Use cosine similarity
random_state=42
)
# Step 2: Configure HDBSCAN for clustering
# Tuned to find more granular topics
hdbscan_model = HDBSCAN(
min_cluster_size=min_topic_size, # Minimum episodes per topic
min_samples=5, # More sensitive to local density
metric='euclidean',
cluster_selection_method='eom', # Excess of mass method
prediction_data=True
)
# Step 3: Configure vectorizer with stopword removal
# Remove common English stopwords that pollute topic keywords
vectorizer_model = CountVectorizer(
stop_words='english', # Remove common English words
min_df=2, # Word must appear in at least 2 docs
max_df=0.95, # Ignore words in >95% of docs
ngram_range=(1, 2) # Include unigrams and bigrams
)
# Step 4: Configure c-TF-IDF with better parameters
ctfidf_model = ClassTfidfTransformer(
reduce_frequent_words=True, # Further reduce common words
bm25_weighting=True # Use BM25 for better keyword extraction
)
# Step 5: Initialize BERTopic with all custom components
model = BERTopic(
embedding_model=None, # Use pre-computed embeddings
umap_model=umap_model,
hdbscan_model=hdbscan_model,
vectorizer_model=vectorizer_model,
ctfidf_model=ctfidf_model,
top_n_words=15, # More keywords per topic
nr_topics=nr_topics, # Optional topic reduction
calculate_probabilities=True,
verbose=(not quiet)
)
# Fit the model with pre-computed embeddings
topics, probs = model.fit_transform(contents, embeddings=embeddings)
# Get topic count
unique_topics = len(set(topics)) - (1 if -1 in topics else 0)
if not quiet:
click.echo(f"✓ Topic modeling complete - Found {unique_topics} topics")
return model, topics, probs
def print_topic_results(
model: BERTopic,
topics: List[int],
uuids: List[str],
contents: List[str]
):
"""Print formatted topic results.
Args:
model: Fitted BERTopic model
topics: Topic assignments for each episode
uuids: Episode UUIDs
contents: Episode contents
"""
# Get topic info
topic_info = model.get_topic_info()
num_topics = len(topic_info) - 1 # Exclude outlier topic (-1)
click.echo(f"\n{'='*80}")
click.echo(f"TOPIC MODELING RESULTS")
click.echo(f"{'='*80}")
click.echo(f"Total Topics Found: {num_topics}")
click.echo(f"Total Episodes: {len(contents)}")
click.echo(f"{'='*80}\n")
# Print each topic
for idx, row in topic_info.iterrows():
topic_id = row['Topic']
count = row['Count']
# Skip outlier topic
if topic_id == -1:
click.echo(f"Topic -1 (Outliers): {count} episodes\n")
continue
# Get top words for this topic
topic_words = model.get_topic(topic_id)
click.echo(f"{''*80}")
click.echo(f"Topic {topic_id}: {count} episodes")
click.echo(f"{''*80}")
# Print top keywords
if topic_words:
keywords = [word for word, score in topic_words[:10]]
click.echo(f"Keywords: {', '.join(keywords)}")
# Print sample episodes
topic_episodes = [(uuid, content) for uuid, content, topic
in zip(uuids, contents, topics) if topic == topic_id]
click.echo(f"\nSample Episodes (showing up to 3):")
for i, (uuid, content) in enumerate(topic_episodes[:3]):
# Truncate content for display
truncated = content[:200] + "..." if len(content) > 200 else content
click.echo(f" {i+1}. [{uuid}]")
click.echo(f" {truncated}\n")
click.echo()
def build_json_output(
model: BERTopic,
topics: List[int],
uuids: List[str]
) -> Dict[str, Any]:
"""Build JSON output structure.
Args:
model: Fitted BERTopic model
topics: Topic assignments for each episode
uuids: Episode UUIDs
Returns:
Dictionary with topics data
"""
# Build topics dictionary
topics_dict = {}
topic_info = model.get_topic_info()
for idx, row in topic_info.iterrows():
topic_id = row['Topic']
# Skip outlier topic
if topic_id == -1:
continue
# Get keywords
topic_words = model.get_topic(topic_id)
keywords = [word for word, score in topic_words[:10]] if topic_words else []
# Get episode IDs for this topic
episode_ids = [uuid for uuid, topic in zip(uuids, topics) if topic == topic_id]
topics_dict[topic_id] = {
"keywords": keywords,
"episodeIds": episode_ids
}
return {"topics": topics_dict}
@click.command()
@click.argument('user_id', type=str)
@click.option(
'--min-topic-size',
default=10,
type=int,
help='Minimum number of episodes per topic (default: 10, lower = more granular topics)'
)
@click.option(
'--nr-topics',
default=None,
type=int,
help='Target number of topics for reduction (optional, e.g., 20 for ~20 topics)'
)
@click.option(
'--neo4j-uri',
envvar='NEO4J_URI',
default='bolt://localhost:7687',
help='Neo4j connection URI (default: bolt://localhost:7687)'
)
@click.option(
'--neo4j-username',
envvar='NEO4J_USERNAME',
default='neo4j',
help='Neo4j username (default: neo4j)'
)
@click.option(
'--neo4j-password',
envvar='NEO4J_PASSWORD',
required=True,
help='Neo4j password (required, can use NEO4J_PASSWORD env var)'
)
@click.option(
'--json',
'json_output',
is_flag=True,
default=False,
help='Output only final results in JSON format (suppresses all other output)'
)
def main(user_id: str, min_topic_size: int, nr_topics: int, neo4j_uri: str, neo4j_username: str, neo4j_password: str, json_output: bool):
"""
Run BERTopic analysis on episodes for a given USER_ID.
This tool connects to Neo4j, retrieves all episodes with embeddings for the specified user,
and performs topic modeling to discover thematic clusters.
Examples:
# Using environment variables from .env file
python main.py user-123
# With custom min topic size
python main.py user-123 --min-topic-size 10
# With explicit Neo4j credentials
python main.py user-123 --neo4j-uri bolt://localhost:7687 --neo4j-password mypassword
"""
# Print header only if not in JSON mode
if not json_output:
click.echo(f"\n{'='*80}")
click.echo("BERT TOPIC MODELING FOR ECHO EPISODES")
click.echo(f"{'='*80}")
click.echo(f"User ID: {user_id}")
click.echo(f"Min Topic Size: {min_topic_size}")
if nr_topics:
click.echo(f"Target Topics: ~{nr_topics}")
click.echo(f"{'='*80}\n")
# Connect to Neo4j (quiet mode if JSON output)
neo4j_conn = Neo4jConnection(neo4j_uri, neo4j_username, neo4j_password, quiet=json_output)
try:
# Fetch episodes with embeddings
uuids, contents, embeddings = neo4j_conn.get_episodes_with_embeddings(user_id)
# Run BERTopic analysis
model, topics, probs = run_bertopic_analysis(contents, embeddings, min_topic_size, nr_topics, quiet=json_output)
# Output results
if json_output:
# JSON output mode - only print JSON
output = build_json_output(model, topics, uuids)
click.echo(json.dumps(output, indent=2))
else:
# Normal output mode - print formatted results
print_topic_results(model, topics, uuids, contents)
click.echo(f"{'='*80}")
click.echo("✓ Analysis complete!")
click.echo(f"{'='*80}\n")
finally:
# Always close connection
neo4j_conn.close()
if __name__ == '__main__':
# Load environment variables from .env file if present
load_dotenv()
main()

View File

@ -0,0 +1,8 @@
bertopic>=0.16.0
neo4j>=5.14.0
click>=8.1.0
numpy>=1.24.0
python-dotenv>=1.0.0
scikit-learn>=1.3.0
umap-learn>=0.5.4
hdbscan>=0.8.33

View File

@ -92,3 +92,26 @@ export const sessionCompactionQueue = new Queue("session-compaction-queue", {
},
},
});
/**
* BERT topic analysis queue
* Handles CPU-intensive topic modeling on user episodes
*/
export const bertTopicQueue = new Queue("bert-topic-queue", {
connection: getRedisConnection(),
defaultJobOptions: {
attempts: 2, // Only 2 attempts due to long runtime
backoff: {
type: "exponential",
delay: 5000,
},
timeout: 300000, // 5 minute timeout
removeOnComplete: {
age: 7200, // Keep completed jobs for 2 hours
count: 100,
},
removeOnFail: {
age: 172800, // Keep failed jobs for 48 hours (for debugging)
},
},
});

View File

@ -23,10 +23,15 @@ import {
processSessionCompaction,
type SessionCompactionPayload,
} from "~/jobs/session/session-compaction.logic";
import {
processTopicAnalysis,
type TopicAnalysisPayload,
} from "~/jobs/bert/topic-analysis.logic";
import {
enqueueIngestEpisode,
enqueueSpaceAssignment,
enqueueSessionCompaction,
enqueueBertTopicAnalysis,
} from "~/lib/queue-adapter.server";
import { logger } from "~/services/logger.service";
@ -47,6 +52,7 @@ export const ingestWorker = new Worker(
// Callbacks to enqueue follow-up jobs
enqueueSpaceAssignment,
enqueueSessionCompaction,
enqueueBertTopicAnalysis,
);
},
{
@ -108,6 +114,22 @@ export const sessionCompactionWorker = new Worker(
},
);
/**
* BERT topic analysis worker
* Handles CPU-intensive topic modeling
*/
export const bertTopicWorker = new Worker(
"bert-topic-queue",
async (job) => {
const payload = job.data as TopicAnalysisPayload;
return await processTopicAnalysis(payload);
},
{
connection: getRedisConnection(),
concurrency: 2, // Process up to 2 analyses in parallel (CPU-intensive)
},
);
/**
* Graceful shutdown handler
*/
@ -116,8 +138,8 @@ export async function closeAllWorkers(): Promise<void> {
ingestWorker.close(),
documentIngestWorker.close(),
conversationTitleWorker.close(),
sessionCompactionWorker.close(),
bertTopicWorker.close(),
]);
logger.log("All BullMQ workers closed");
}

View File

@ -0,0 +1,116 @@
import { exec } from "child_process";
import { promisify } from "util";
const execAsync = promisify(exec);
export interface TopicAnalysisPayload {
userId: string;
minTopicSize?: number;
nrTopics?: number;
}
export interface TopicAnalysisResult {
topics: {
[topicId: string]: {
keywords: string[];
episodeIds: string[];
};
};
}
/**
* Process BERT topic analysis on user's episodes
* This is the common logic shared between Trigger.dev and BullMQ
*
* NOTE: This function does NOT update workspace.metadata.lastTopicAnalysisAt
* That should be done by the caller BEFORE enqueueing this job to prevent
* duplicate analyses from racing conditions.
*/
export async function processTopicAnalysis(
payload: TopicAnalysisPayload
): Promise<TopicAnalysisResult> {
const { userId, minTopicSize = 10, nrTopics } = payload;
console.log(`[BERT Topic Analysis] Starting analysis for user: ${userId}`);
console.log(
`[BERT Topic Analysis] Parameters: minTopicSize=${minTopicSize}, nrTopics=${nrTopics || "auto"}`
);
// Build the command
let command = `python3 /core/apps/webapp/app/bert/main.py ${userId} --json`;
if (minTopicSize) {
command += ` --min-topic-size ${minTopicSize}`;
}
if (nrTopics) {
command += ` --nr-topics ${nrTopics}`;
}
console.log(`[BERT Topic Analysis] Executing: ${command}`);
try {
const startTime = Date.now();
// Execute the Python script with a 5-minute timeout
const { stdout, stderr } = await execAsync(command, {
timeout: 300000, // 5 minutes
maxBuffer: 10 * 1024 * 1024, // 10MB buffer for large outputs
});
const duration = Date.now() - startTime;
console.log(`[BERT Topic Analysis] Completed in ${duration}ms`);
if (stderr) {
console.warn(`[BERT Topic Analysis] Warnings:`, stderr);
}
// Parse the JSON output
const result: TopicAnalysisResult = JSON.parse(stdout);
// Log summary
const topicCount = Object.keys(result.topics).length;
const totalEpisodes = Object.values(result.topics).reduce(
(sum, topic) => sum + topic.episodeIds.length,
0
);
console.log(
`[BERT Topic Analysis] Found ${topicCount} topics covering ${totalEpisodes} episodes`
);
return result;
} catch (error) {
console.error(`[BERT Topic Analysis] Error:`, error);
if (error instanceof Error) {
// Check for timeout
if (error.message.includes("ETIMEDOUT")) {
throw new Error(
`Topic analysis timed out after 5 minutes. User may have too many episodes.`
);
}
// Check for Python errors
if (error.message.includes("python3: not found")) {
throw new Error(
`Python 3 is not installed or not available in PATH.`
);
}
// Check for Neo4j connection errors
if (error.message.includes("Failed to connect to Neo4j")) {
throw new Error(
`Could not connect to Neo4j. Check NEO4J_URI and credentials.`
);
}
// Check for no episodes
if (error.message.includes("No episodes found")) {
throw new Error(`No episodes found for userId: ${userId}`);
}
}
throw error;
}
}

View File

@ -7,6 +7,10 @@ import { prisma } from "~/trigger/utils/prisma";
import { EpisodeType } from "@core/types";
import { deductCredits, hasCredits } from "~/trigger/utils/utils";
import { assignEpisodesToSpace } from "~/services/graphModels/space";
import {
shouldTriggerTopicAnalysis,
updateLastTopicAnalysisTime,
} from "~/services/bertTopicAnalysis.server";
export const IngestBodyRequest = z.object({
episodeBody: z.string(),
@ -55,6 +59,11 @@ export async function processEpisodeIngestion(
sessionId: string;
source: string;
}) => Promise<any>,
enqueueBertTopicAnalysis?: (params: {
userId: string;
minTopicSize?: number;
nrTopics?: number;
}) => Promise<any>,
): Promise<IngestEpisodeResult> {
try {
logger.log(`Processing job for user ${payload.userId}`);
@ -250,6 +259,43 @@ export async function processEpisodeIngestion(
});
}
// Auto-trigger BERT topic analysis if threshold met (20+ new episodes)
try {
if (
currentStatus === IngestionStatus.COMPLETED &&
enqueueBertTopicAnalysis
) {
const shouldTrigger = await shouldTriggerTopicAnalysis(
payload.userId,
payload.workspaceId,
);
if (shouldTrigger) {
logger.info(
`Triggering BERT topic analysis after reaching 20+ new episodes`,
{
userId: payload.userId,
workspaceId: payload.workspaceId,
},
);
await enqueueBertTopicAnalysis({
userId: payload.userId,
minTopicSize: 10,
});
// Update the last analysis timestamp
await updateLastTopicAnalysisTime(payload.workspaceId);
}
}
} catch (topicAnalysisError) {
// Don't fail the ingestion if topic analysis fails
logger.warn(`Failed to trigger topic analysis after ingestion:`, {
error: topicAnalysisError,
userId: payload.userId,
});
}
return { success: true, episodeDetails };
} catch (err: any) {
await prisma.ingestionQueue.update({

View File

@ -163,6 +163,39 @@ export async function enqueueSpaceAssignment(
}
}
/**
* Enqueue BERT topic analysis job
*/
export async function enqueueBertTopicAnalysis(payload: {
userId: string;
minTopicSize?: number;
nrTopics?: number;
}): Promise<{ id?: string }> {
const provider = env.QUEUE_PROVIDER as QueueProvider;
if (provider === "trigger") {
const { bertTopicAnalysisTask } = await import(
"~/trigger/bert/topic-analysis"
);
const handler = await bertTopicAnalysisTask.trigger(payload, {
queue: "bert-topic-analysis",
concurrencyKey: payload.userId,
tags: [payload.userId, "bert-analysis"],
});
return { id: handler.id };
} else {
// BullMQ
const { bertTopicQueue } = await import("~/bullmq/queues");
const job = await bertTopicQueue.add("topic-analysis", payload, {
jobId: `bert-${payload.userId}-${Date.now()}`,
attempts: 2, // Only 2 attempts for expensive operations
backoff: { type: "exponential", delay: 5000 },
timeout: 300000, // 5 minute timeout
});
return { id: job.id };
}
}
export const isTriggerDeployment = () => {
return env.QUEUE_PROVIDER === "trigger";
};

View File

@ -0,0 +1,107 @@
import { prisma } from "~/trigger/utils/prisma";
import { logger } from "~/services/logger.service";
import { runQuery } from "~/lib/neo4j.server";
interface WorkspaceMetadata {
lastTopicAnalysisAt?: string;
[key: string]: any;
}
/**
* Check if we should trigger a BERT topic analysis for this workspace
* Criteria: 20+ new episodes since last analysis (or no previous analysis)
*/
export async function shouldTriggerTopicAnalysis(
userId: string,
workspaceId: string,
): Promise<boolean> {
try {
// Get workspace metadata
const workspace = await prisma.workspace.findUnique({
where: { id: workspaceId },
select: { metadata: true },
});
if (!workspace) {
logger.warn(`Workspace not found: ${workspaceId}`);
return false;
}
const metadata = (workspace.metadata || {}) as WorkspaceMetadata;
const lastAnalysisAt = metadata.lastTopicAnalysisAt;
// Count episodes since last analysis
const query = lastAnalysisAt
? `
MATCH (e:Episode {userId: $userId})
WHERE e.createdAt > datetime($lastAnalysisAt)
RETURN count(e) as newEpisodeCount
`
: `
MATCH (e:Episode {userId: $userId})
RETURN count(e) as totalEpisodeCount
`;
const result = await runQuery(query, {
userId,
lastAnalysisAt,
});
const episodeCount = lastAnalysisAt
? result[0]?.get("newEpisodeCount")?.toNumber() || 0
: result[0]?.get("totalEpisodeCount")?.toNumber() || 0;
logger.info(
`[Topic Analysis Check] User: ${userId}, New episodes: ${episodeCount}, Last analysis: ${lastAnalysisAt || "never"}`,
);
// Trigger if 20+ new episodes
return episodeCount >= 20;
} catch (error) {
logger.error(
`[Topic Analysis Check] Error checking episode count:`,
error,
);
return false;
}
}
/**
* Update workspace metadata with last topic analysis timestamp
*/
export async function updateLastTopicAnalysisTime(
workspaceId: string,
): Promise<void> {
try {
const workspace = await prisma.workspace.findUnique({
where: { id: workspaceId },
select: { metadata: true },
});
if (!workspace) {
logger.warn(`Workspace not found: ${workspaceId}`);
return;
}
const metadata = (workspace.metadata || {}) as WorkspaceMetadata;
await prisma.workspace.update({
where: { id: workspaceId },
data: {
metadata: {
...metadata,
lastTopicAnalysisAt: new Date().toISOString(),
},
},
});
logger.info(
`[Topic Analysis] Updated last analysis timestamp for workspace: ${workspaceId}`,
);
} catch (error) {
logger.error(
`[Topic Analysis] Error updating last analysis timestamp:`,
error,
);
}
}

View File

@ -0,0 +1,21 @@
import { task } from "@trigger.dev/sdk/v3";
import {
processTopicAnalysis,
type TopicAnalysisPayload,
} from "~/jobs/bert/topic-analysis.logic";
/**
* Trigger.dev task for BERT topic analysis
*
* This is a thin wrapper around the common logic in jobs/bert/topic-analysis.logic.ts
*/
export const bertTopicAnalysisTask = task({
id: "bert-topic-analysis",
queue: {
name: "bert-topic-analysis",
concurrencyLimit: 3, // Max 3 parallel analyses to avoid CPU overload
},
run: async (payload: TopicAnalysisPayload) => {
return await processTopicAnalysis(payload);
},
});

View File

@ -6,6 +6,7 @@ import {
} from "~/jobs/ingest/ingest-episode.logic";
import { triggerSpaceAssignment } from "../spaces/space-assignment";
import { triggerSessionCompaction } from "../session/session-compaction";
import { bertTopicAnalysisTask } from "../bert/topic-analysis";
const ingestionQueue = queue({
name: "ingestion-queue",
@ -32,6 +33,14 @@ export const ingestTask = task({
async (params) => {
await triggerSessionCompaction(params);
},
// Callback for BERT topic analysis
async (params) => {
await bertTopicAnalysisTask.trigger(params, {
queue: "bert-topic-analysis",
concurrencyKey: params.userId,
tags: [params.userId, "bert-analysis"],
});
},
);
},
});

View File

@ -1,557 +0,0 @@
import { task } from "@trigger.dev/sdk/v3";
import { logger } from "~/services/logger.service";
import { makeModelCall } from "~/lib/model.server";
import { runQuery } from "~/lib/neo4j.server";
import type { CoreMessage } from "ai";
import { z } from "zod";
import {
EXPLICIT_PATTERN_TYPES,
IMPLICIT_PATTERN_TYPES,
type SpacePattern,
type PatternDetectionResult,
} from "@core/types";
import { createSpacePattern, getSpace } from "../utils/space-utils";
interface SpacePatternPayload {
userId: string;
workspaceId: string;
spaceId: string;
triggerSource?:
| "summary_complete"
| "manual"
| "assignment"
| "scheduled"
| "new_space"
| "growth_threshold"
| "ingestion_complete";
}
interface SpaceStatementData {
uuid: string;
fact: string;
subject: string;
predicate: string;
object: string;
createdAt: Date;
validAt: Date;
content?: string; // For implicit pattern analysis
}
interface SpaceThemeData {
themes: string[];
summary: string;
}
// Zod schemas for LLM response validation
const ExplicitPatternSchema = z.object({
name: z.string(),
type: z.string(),
summary: z.string(),
evidence: z.array(z.string()),
confidence: z.number().min(0).max(1),
});
const ImplicitPatternSchema = z.object({
name: z.string(),
type: z.string(),
summary: z.string(),
evidence: z.array(z.string()),
confidence: z.number().min(0).max(1),
});
const PatternAnalysisSchema = z.object({
explicitPatterns: z.array(ExplicitPatternSchema),
implicitPatterns: z.array(ImplicitPatternSchema),
});
const CONFIG = {
minStatementsForPatterns: 5,
maxPatternsPerSpace: 20,
minPatternConfidence: 0.85,
};
export const spacePatternTask = task({
id: "space-pattern",
run: async (payload: SpacePatternPayload) => {
const { userId, workspaceId, spaceId, triggerSource = "manual" } = payload;
logger.info(`Starting space pattern detection`, {
userId,
workspaceId,
spaceId,
triggerSource,
});
try {
// Get space data and check if it has enough content
const space = await getSpaceForPatternAnalysis(spaceId);
if (!space) {
return {
success: false,
spaceId,
error: "Space not found or insufficient data",
};
}
// Get statements for pattern analysis
const statements = await getSpaceStatementsForPatterns(spaceId, userId);
if (statements.length < CONFIG.minStatementsForPatterns) {
logger.info(
`Space ${spaceId} has insufficient statements (${statements.length}) for pattern detection`,
);
return {
success: true,
spaceId,
triggerSource,
patterns: {
explicitPatterns: [],
implicitPatterns: [],
totalPatternsFound: 0,
},
};
}
// Detect patterns
const patternResult = await detectSpacePatterns(space, statements);
if (patternResult) {
// Store patterns
await storePatterns(
patternResult.explicitPatterns,
patternResult.implicitPatterns,
spaceId,
);
logger.info(`Generated patterns for space ${spaceId}`, {
explicitPatterns: patternResult.explicitPatterns.length,
implicitPatterns: patternResult.implicitPatterns.length,
totalPatterns: patternResult.totalPatternsFound,
triggerSource,
});
return {
success: true,
spaceId,
triggerSource,
patterns: {
explicitPatterns: patternResult.explicitPatterns.length,
implicitPatterns: patternResult.implicitPatterns.length,
totalPatternsFound: patternResult.totalPatternsFound,
},
};
} else {
logger.warn(`Failed to detect patterns for space ${spaceId}`);
return {
success: false,
spaceId,
triggerSource,
error: "Failed to detect patterns",
};
}
} catch (error) {
logger.error(
`Error in space pattern detection for space ${spaceId}:`,
error as Record<string, unknown>,
);
throw error;
}
},
});
async function getSpaceForPatternAnalysis(
spaceId: string,
): Promise<SpaceThemeData | null> {
try {
const space = await getSpace(spaceId);
if (!space || !space.themes || space.themes.length === 0) {
logger.warn(
`Space ${spaceId} not found or has no themes for pattern analysis`,
);
return null;
}
return {
themes: space.themes,
summary: space.summary || "",
};
} catch (error) {
logger.error(
`Error getting space for pattern analysis:`,
error as Record<string, unknown>,
);
return null;
}
}
async function getSpaceStatementsForPatterns(
spaceId: string,
userId: string,
): Promise<SpaceStatementData[]> {
const query = `
MATCH (s:Statement)
WHERE s.userId = $userId
AND s.spaceIds IS NOT NULL
AND $spaceId IN s.spaceIds
AND s.invalidAt IS NULL
MATCH (s)-[:HAS_SUBJECT]->(subj:Entity)
MATCH (s)-[:HAS_PREDICATE]->(pred:Entity)
MATCH (s)-[:HAS_OBJECT]->(obj:Entity)
RETURN s, subj.name as subject, pred.name as predicate, obj.name as object
ORDER BY s.createdAt DESC
`;
const result = await runQuery(query, {
spaceId,
userId,
});
return result.map((record) => {
const statement = record.get("s").properties;
return {
uuid: statement.uuid,
fact: statement.fact,
subject: record.get("subject"),
predicate: record.get("predicate"),
object: record.get("object"),
createdAt: new Date(statement.createdAt),
validAt: new Date(statement.validAt),
content: statement.fact, // Use fact as content for implicit analysis
};
});
}
async function detectSpacePatterns(
space: SpaceThemeData,
statements: SpaceStatementData[],
): Promise<PatternDetectionResult | null> {
try {
// Extract explicit patterns from themes
const explicitPatterns = await extractExplicitPatterns(
space.themes,
space.summary,
statements,
);
// Extract implicit patterns from statement analysis
const implicitPatterns = await extractImplicitPatterns(statements);
return {
explicitPatterns,
implicitPatterns,
totalPatternsFound: explicitPatterns.length + implicitPatterns.length,
processingStats: {
statementsAnalyzed: statements.length,
themesProcessed: space.themes.length,
implicitPatternsExtracted: implicitPatterns.length,
},
};
} catch (error) {
logger.error(
"Error detecting space patterns:",
error as Record<string, unknown>,
);
return null;
}
}
async function extractExplicitPatterns(
themes: string[],
summary: string,
statements: SpaceStatementData[],
): Promise<Omit<SpacePattern, "id" | "createdAt" | "updatedAt" | "spaceId">[]> {
if (themes.length === 0) return [];
const prompt = createExplicitPatternPrompt(themes, summary, statements);
// Pattern extraction requires HIGH complexity (insight synthesis, pattern recognition)
let responseText = "";
await makeModelCall(false, prompt, (text: string) => {
responseText = text;
}, undefined, 'high');
const patterns = parseExplicitPatternResponse(responseText);
return patterns.map((pattern) => ({
name: pattern.name || `${pattern.type} pattern`,
source: "explicit" as const,
type: pattern.type,
summary: pattern.summary,
evidence: pattern.evidence,
confidence: pattern.confidence,
userConfirmed: "pending" as const,
}));
}
async function extractImplicitPatterns(
statements: SpaceStatementData[],
): Promise<Omit<SpacePattern, "id" | "createdAt" | "updatedAt" | "spaceId">[]> {
if (statements.length < CONFIG.minStatementsForPatterns) return [];
const prompt = createImplicitPatternPrompt(statements);
// Implicit pattern discovery requires HIGH complexity (pattern recognition from statements)
let responseText = "";
await makeModelCall(false, prompt, (text: string) => {
responseText = text;
}, undefined, 'high');
const patterns = parseImplicitPatternResponse(responseText);
return patterns.map((pattern) => ({
name: pattern.name || `${pattern.type} pattern`,
source: "implicit" as const,
type: pattern.type,
summary: pattern.summary,
evidence: pattern.evidence,
confidence: pattern.confidence,
userConfirmed: "pending" as const,
}));
}
function createExplicitPatternPrompt(
themes: string[],
summary: string,
statements: SpaceStatementData[],
): CoreMessage[] {
const statementsText = statements
.map((stmt) => `[${stmt.uuid}] ${stmt.fact}`)
.join("\n");
const explicitTypes = Object.values(EXPLICIT_PATTERN_TYPES).join('", "');
return [
{
role: "system",
content: `You are an expert at extracting structured patterns from themes and supporting evidence.
Your task is to convert high-level themes into explicit patterns with supporting statement evidence.
INSTRUCTIONS:
1. For each theme, create a pattern that explains what it reveals about the user
2. Give each pattern a short, descriptive name (2-4 words)
3. Find supporting statement IDs that provide evidence for each pattern
4. Assess confidence based on evidence strength and theme clarity
5. Use appropriate pattern types from these guidelines: "${explicitTypes}"
- "theme": High-level thematic content areas
- "topic": Specific subject matter or topics of interest
- "domain": Knowledge or work domains the user operates in
- "interest_area": Areas of personal interest or hobby
6. You may suggest new pattern types if none of the guidelines fit well
RESPONSE FORMAT:
Provide your response inside <output></output> tags with valid JSON.
<output>
{
"explicitPatterns": [
{
"name": "Short descriptive name for the pattern",
"type": "theme",
"summary": "Description of what this pattern reveals about the user",
"evidence": ["statement_id_1", "statement_id_2"],
"confidence": 0.85
}
]
}
</output>`,
},
{
role: "user",
content: `THEMES TO ANALYZE:
${themes.map((theme, i) => `${i + 1}. ${theme}`).join("\n")}
SPACE SUMMARY:
${summary}
SUPPORTING STATEMENTS:
${statementsText}
Please extract explicit patterns from these themes and map them to supporting statement evidence.`,
},
];
}
function createImplicitPatternPrompt(
statements: SpaceStatementData[],
): CoreMessage[] {
const statementsText = statements
.map(
(stmt) =>
`[${stmt.uuid}] ${stmt.fact} (${stmt.subject}${stmt.predicate}${stmt.object})`,
)
.join("\n");
const implicitTypes = Object.values(IMPLICIT_PATTERN_TYPES).join('", "');
return [
{
role: "system",
content: `You are an expert at discovering implicit behavioral patterns from statement analysis.
Your task is to identify hidden patterns in user behavior, preferences, and habits from statement content.
INSTRUCTIONS:
1. Analyze statement content for behavioral patterns, not explicit topics
2. Give each pattern a short, descriptive name (2-4 words)
3. Look for recurring behaviors, preferences, and working styles
4. Identify how the user approaches tasks, makes decisions, and interacts
5. Use appropriate pattern types from these guidelines: "${implicitTypes}"
- "preference": Personal preferences and choices
- "habit": Recurring behaviors and routines
- "workflow": Work and process patterns
- "communication_style": How user communicates and expresses ideas
- "decision_pattern": Decision-making approaches and criteria
- "temporal_pattern": Time-based behavioral patterns
- "behavioral_pattern": General behavioral tendencies
- "learning_style": How user learns and processes information
- "collaboration_style": How user works with others
6. You may suggest new pattern types if none of the guidelines fit well
7. Focus on what the statements reveal about how the user thinks, works, or behaves
RESPONSE FORMAT:
Provide your response inside <output></output> tags with valid JSON.
<output>
{
"implicitPatterns": [
{
"name": "Short descriptive name for the pattern",
"type": "preference",
"summary": "Description of what this behavioral pattern reveals",
"evidence": ["statement_id_1", "statement_id_2"],
"confidence": 0.75
}
]
}
</output>`,
},
{
role: "user",
content: `STATEMENTS TO ANALYZE FOR IMPLICIT PATTERNS:
${statementsText}
Please identify implicit behavioral patterns, preferences, and habits from these statements.`,
},
];
}
function parseExplicitPatternResponse(response: string): Array<{
name: string;
type: string;
summary: string;
evidence: string[];
confidence: number;
}> {
try {
const outputMatch = response.match(/<output>([\s\S]*?)<\/output>/);
if (!outputMatch) {
logger.warn("No <output> tags found in explicit pattern response");
return [];
}
const parsed = JSON.parse(outputMatch[1].trim());
const validationResult = z
.object({
explicitPatterns: z.array(ExplicitPatternSchema),
})
.safeParse(parsed);
if (!validationResult.success) {
logger.warn("Invalid explicit pattern response format:", {
error: validationResult.error,
});
return [];
}
return validationResult.data.explicitPatterns.filter(
(p) =>
p.confidence >= CONFIG.minPatternConfidence && p.evidence.length >= 3, // Ensure at least 3 evidence statements
);
} catch (error) {
logger.error(
"Error parsing explicit pattern response:",
error as Record<string, unknown>,
);
return [];
}
}
function parseImplicitPatternResponse(response: string): Array<{
name: string;
type: string;
summary: string;
evidence: string[];
confidence: number;
}> {
try {
const outputMatch = response.match(/<output>([\s\S]*?)<\/output>/);
if (!outputMatch) {
logger.warn("No <output> tags found in implicit pattern response");
return [];
}
const parsed = JSON.parse(outputMatch[1].trim());
const validationResult = z
.object({
implicitPatterns: z.array(ImplicitPatternSchema),
})
.safeParse(parsed);
if (!validationResult.success) {
logger.warn("Invalid implicit pattern response format:", {
error: validationResult.error,
});
return [];
}
return validationResult.data.implicitPatterns.filter(
(p) =>
p.confidence >= CONFIG.minPatternConfidence && p.evidence.length >= 3, // Ensure at least 3 evidence statements
);
} catch (error) {
logger.error(
"Error parsing implicit pattern response:",
error as Record<string, unknown>,
);
return [];
}
}
async function storePatterns(
explicitPatterns: Omit<
SpacePattern,
"id" | "createdAt" | "updatedAt" | "spaceId"
>[],
implicitPatterns: Omit<
SpacePattern,
"id" | "createdAt" | "updatedAt" | "spaceId"
>[],
spaceId: string,
): Promise<void> {
try {
const allPatterns = [...explicitPatterns, ...implicitPatterns];
if (allPatterns.length === 0) return;
// Store in PostgreSQL
await createSpacePattern(spaceId, allPatterns);
logger.info(`Stored ${allPatterns.length} patterns`, {
explicit: explicitPatterns.length,
implicit: implicitPatterns.length,
});
} catch (error) {
logger.error("Error storing patterns:", error as Record<string, unknown>);
throw error;
}
}
// Helper function to trigger the task
export async function triggerSpacePattern(payload: SpacePatternPayload) {
return await spacePatternTask.trigger(payload, {
concurrencyKey: `space-pattern-${payload.spaceId}`, // Prevent parallel runs for the same space
tags: [payload.userId, payload.spaceId, payload.triggerSource || "manual"],
});
}

View File

@ -19,24 +19,24 @@ const SearchParamsSchema = {
description:
"Search query optimized for knowledge graph retrieval. Choose the right query structure based on your search intent:\n\n" +
"1. **Entity-Centric Queries** (Best for graph search):\n" +
" - ✅ GOOD: \"User's preferences for code style and formatting\"\n" +
" - ✅ GOOD: \"Project authentication implementation decisions\"\n" +
" - ❌ BAD: \"user code style\"\n" +
' - ✅ GOOD: "User\'s preferences for code style and formatting"\n' +
' - ✅ GOOD: "Project authentication implementation decisions"\n' +
' - ❌ BAD: "user code style"\n' +
" - Format: [Person/Project] + [relationship/attribute] + [context]\n\n" +
"2. **Multi-Entity Relationship Queries** (Excellent for episode graph):\n" +
" - ✅ GOOD: \"User and team discussions about API design patterns\"\n" +
" - ✅ GOOD: \"relationship between database schema and performance optimization\"\n" +
" - ❌ BAD: \"user team api design\"\n" +
' - ✅ GOOD: "User and team discussions about API design patterns"\n' +
' - ✅ GOOD: "relationship between database schema and performance optimization"\n' +
' - ❌ BAD: "user team api design"\n' +
" - Format: [Entity1] + [relationship type] + [Entity2] + [context]\n\n" +
"3. **Semantic Question Queries** (Good for vector search):\n" +
" - ✅ GOOD: \"What causes authentication errors in production? What are the security requirements?\"\n" +
" - ✅ GOOD: \"How does caching improve API response times compared to direct database queries?\"\n" +
" - ❌ BAD: \"auth errors production\"\n" +
' - ✅ GOOD: "What causes authentication errors in production? What are the security requirements?"\n' +
' - ✅ GOOD: "How does caching improve API response times compared to direct database queries?"\n' +
' - ❌ BAD: "auth errors production"\n' +
" - Format: Complete natural questions with full context\n\n" +
"4. **Concept Exploration Queries** (Good for BFS traversal):\n" +
" - ✅ GOOD: \"concepts and ideas related to database indexing and query optimization\"\n" +
" - ✅ GOOD: \"topics connected to user authentication and session management\"\n" +
" - ❌ BAD: \"database indexing concepts\"\n" +
' - ✅ GOOD: "concepts and ideas related to database indexing and query optimization"\n' +
' - ✅ GOOD: "topics connected to user authentication and session management"\n' +
' - ❌ BAD: "database indexing concepts"\n' +
" - Format: [concept] + related/connected + [domain/context]\n\n" +
"Avoid keyword soup queries - use complete phrases with proper context for best results.",
},
@ -462,7 +462,7 @@ async function handleGetSpace(args: any) {
const spaceDetails = {
id: space.id,
name: space.name,
description: space.description,
summary: space.summary,
};
return {

View File

@ -55,7 +55,16 @@ RUN pnpm run build --filter=webapp...
# Runner
FROM ${NODE_IMAGE} AS runner
RUN apt-get update && apt-get install -y openssl netcat-openbsd ca-certificates
RUN apt-get update && apt-get install -y \
openssl \
netcat-openbsd \
ca-certificates \
python3 \
python3-pip \
python3-venv \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /core
RUN corepack enable
ENV NODE_ENV production
@ -69,6 +78,13 @@ COPY --from=builder --chown=node:node /core/apps/webapp/build ./apps/webapp/buil
COPY --from=builder --chown=node:node /core/apps/webapp/public ./apps/webapp/public
COPY --from=builder --chown=node:node /core/scripts ./scripts
# Install BERT Python dependencies
COPY --chown=node:node apps/webapp/app/bert/requirements.txt ./apps/webapp/app/bert/requirements.txt
RUN pip3 install --no-cache-dir -r ./apps/webapp/app/bert/requirements.txt
# Copy BERT scripts
COPY --chown=node:node apps/webapp/app/bert/main.py ./apps/webapp/app/bert/main.py
EXPOSE 3000
USER node

View File

@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "Workspace" ADD COLUMN "metadata" JSONB NOT NULL DEFAULT '{}';

View File

@ -694,6 +694,8 @@ model Workspace {
slug String @unique
icon String?
metadata Json @default("{}")
integrations String[]
userId String? @unique