feat: add BullMQ space assignment, replace GDS cosine similarity, and make Trigger optional

- Implement space assignment queue and worker for BullMQ - Add native Cypher cosine similarity function to replace GDS dependency - Make Trigger.dev environment variables optional (only required when QUEUE_PROVIDER=trigger) - Add EMBEDDING_MODEL_SIZE configuration for vector index management - Update vector search queries to use native Neo4j vector search with score - Add Neo4j utility scripts for index management and troubleshooting - Update documentation with embedding model configuration and vector index recreation steps
2026-01-26 18:38:29 +00:00 · 2025-10-24 20:36:55 +00:00 · 2025-10-24 20:36:55 +00:00 · 164e5a2f1d
commit 164e5a2f1d
parent 2030cebdc0
22 changed files with 1094 additions and 69 deletions
--- a/.env.example
+++ b/.env.example
@ -49,6 +49,7 @@ NEO4J_AUTH=neo4j/27192e6432564f4788d55c15131bd5ac
 OLLAMA_URL=http://ollama:11434
 EMBEDDING_MODEL=text-embedding-3-small
 EMBEDDING_MODEL_SIZE=1536
 MODEL=gpt-4.1-2025-04-14
 ## AWS Bedrock ##
--- a/apps/webapp/app/bullmq/queues/index.ts
+++ b/apps/webapp/app/bullmq/queues/index.ts
@ -113,3 +113,25 @@ export const sessionCompactionQueue = new Queue("session-compaction-queue", {
    },
  },
 });
 /**
 * Space assignment queue
 * Handles assigning episodes to relevant spaces using AI
 */
 export const spaceAssignmentQueue = new Queue("space-assignment-queue", {
  connection: getRedisConnection(),
  defaultJobOptions: {
    attempts: 2, // Only retry once for space assignments
    backoff: {
      type: "exponential",
      delay: 5000,
    },
    removeOnComplete: {
      age: 3600,
      count: 1000,
    },
    removeOnFail: {
      age: 86400,
    },
  },
 });
--- a/apps/webapp/app/bullmq/workers/index.ts
+++ b/apps/webapp/app/bullmq/workers/index.ts
@ -26,6 +26,10 @@ import {
  processSessionCompaction,
  type SessionCompactionPayload,
 } from "~/jobs/session/session-compaction.logic";
 import {
  processSpaceAssignment,
  type SpaceAssignmentPayload,
 } from "~/jobs/spaces/space-assignment.logic";
 import {
  enqueueIngestEpisode,
  enqueueSpaceAssignment,
@ -166,6 +170,37 @@ sessionCompactionWorker.on("failed", (job, error) => {
  logger.error(`Session compaction job ${job?.id} failed: ${error}`);
 });
 /**
 * Space assignment worker
 * Assigns episodes to relevant spaces using AI
 */
 export const spaceAssignmentWorker = new Worker(
  "space-assignment-queue",
  async (job) => {
    const payload = job.data as SpaceAssignmentPayload;
    // TODO: Add enqueue callbacks for space summary and space pattern
    // For now, these are optional and space assignment will work without them
    return await processSpaceAssignment(
      payload,
      undefined, // enqueueSpaceSummary - to be implemented
      undefined, // enqueueSpacePattern - to be implemented
    );
  },
  {
    connection: getRedisConnection(),
    concurrency: 2, // Process up to 2 space assignments in parallel
  },
 );
 spaceAssignmentWorker.on("completed", (job) => {
  logger.log(`Space assignment job ${job.id} completed`);
 });
 spaceAssignmentWorker.on("failed", (job, error) => {
  logger.error(`Space assignment job ${job?.id} failed: ${error}`);
 });
 /**
 * Graceful shutdown handler
 */
@ -176,6 +211,7 @@ export async function closeAllWorkers(): Promise<void> {
    conversationTitleWorker.close(),
    deepSearchWorker.close(),
    sessionCompactionWorker.close(),
    spaceAssignmentWorker.close(),
  ]);
  logger.log("All BullMQ workers closed");
 }
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@ -79,9 +79,9 @@ const EnvironmentSchema = z.object({
  SMTP_PASSWORD: z.string().optional(),
  //Trigger
-  TRIGGER_PROJECT_ID: z.string(),
+  TRIGGER_PROJECT_ID: z.string().optional(),
-  TRIGGER_SECRET_KEY: z.string(),
+  TRIGGER_SECRET_KEY: z.string().optional(),
-  TRIGGER_API_URL: z.string(),
+  TRIGGER_API_URL: z.string().optional(),
  TRIGGER_DB: z.string().default("trigger"),
  // Model envs
@ -97,7 +97,7 @@ const EnvironmentSchema = z.object({
  AWS_REGION: z.string().optional(),
  // Queue provider
-  QUEUE_PROVIDER: z.enum(["trigger", "bullmq"]).default("trigger"),
+  QUEUE_PROVIDER: z.enum(["trigger", "bullmq"]).default("bullmq"),
 });
 export type Environment = z.infer<typeof EnvironmentSchema>;
--- a/apps/webapp/app/jobs/spaces/space-assignment.logic.ts
+++ b/apps/webapp/app/jobs/spaces/space-assignment.logic.ts
@ -0,0 +1,603 @@
 import { z } from "zod";
 import { logger } from "~/services/logger.service";
 import { SpaceService } from "~/services/space.server";
 import { makeModelCall } from "~/lib/model.server";
 import { createBatch, getBatch } from "~/lib/batch.server";
 import { runQuery } from "~/lib/neo4j.server";
 import {
  assignEpisodesToSpace,
  getSpaceEpisodeCount,
 } from "~/services/graphModels/space";
 import {
  updateMultipleSpaceStatuses,
  SPACE_STATUS,
 } from "~/trigger/utils/space-status";
 import type { CoreMessage } from "ai";
 import type { Space } from "@prisma/client";
 export interface SpaceAssignmentPayload {
  userId: string;
  workspaceId: string;
  mode: "new_space" | "episode";
  newSpaceId?: string; // For new_space mode
  episodeIds?: string[]; // For episode mode
  batchSize?: number; // Processing batch size
 }
 interface EpisodeData {
  uuid: string;
  content: string;
  originalContent: string;
  source: string;
  createdAt: Date;
  metadata: any;
 }
 interface SpaceData {
  uuid: string;
  name: string;
  description?: string;
  episodeCount: number;
 }
 interface AssignmentResult {
  episodeId: string;
  spaceIds: string[];
  confidence: number;
  reasoning?: string;
 }
 const CONFIG = {
  newSpaceMode: {
    batchSize: 20,
    confidenceThreshold: 0.75,
    useBatchAPI: true,
    minEpisodesForBatch: 5,
  },
  episodeMode: {
    batchSize: 20,
    confidenceThreshold: 0.75,
    useBatchAPI: true,
    minEpisodesForBatch: 5,
  },
 };
 // Zod schema for LLM response validation
 const AssignmentResultSchema = z.array(
  z.object({
    episodeId: z.string(),
    addSpaceId: z.array(z.string()),
    confidence: z.number(),
    reasoning: z.string(),
  }),
 );
 /**
 * Core business logic for space assignment
 * This is shared between Trigger.dev and BullMQ implementations
 */
 export async function processSpaceAssignment(
  payload: SpaceAssignmentPayload,
  // Callback functions for enqueueing follow-up jobs
  enqueueSpaceSummary?: (params: {
    userId: string;
    workspaceId: string;
    spaceId: string;
    triggerSource: string;
  }) => Promise<any>,
  enqueueSpacePattern?: (params: {
    userId: string;
    workspaceId: string;
    spaceId: string;
  }) => Promise<any>,
 ): Promise<{
  success: boolean;
  mode: string;
  processed: number;
  assignments: number;
  batches?: number;
  spacesAvailable: number;
  affectedSpaces: number;
 }> {
  const {
    userId,
    workspaceId,
    mode,
    newSpaceId,
    episodeIds,
    batchSize = mode === "new_space"
      ? CONFIG.newSpaceMode.batchSize
      : CONFIG.episodeMode.batchSize,
  } = payload;
  logger.info(`Starting space assignment`, {
    userId,
    mode,
    newSpaceId,
    episodeIds,
    batchSize,
  });
  const spaceService = new SpaceService();
  try {
    // 1. Get user's spaces
    const spaces = await spaceService.getUserSpaces(userId);
    if (spaces.length === 0) {
      logger.info(`No spaces found for user ${userId}, skipping assignment`);
      return {
        success: true,
        mode,
        processed: 0,
        assignments: 0,
        spacesAvailable: 0,
        affectedSpaces: 0,
      };
    }
    // 2. Get episodes to analyze based on mode
    const episodes = await getEpisodesToAnalyze(userId, mode, {
      newSpaceId,
      episodeIds,
    });
    if (episodes.length === 0) {
      logger.info(
        `No episodes to analyze for user ${userId} in ${mode} mode`,
      );
      return {
        success: true,
        mode,
        processed: 0,
        assignments: 0,
        spacesAvailable: spaces.length,
        affectedSpaces: 0,
      };
    }
    // 3. Process episodes using batch AI or fallback to sequential
    const config =
      mode === "new_space" ? CONFIG.newSpaceMode : CONFIG.episodeMode;
    const shouldUseBatchAPI = true;
    let totalProcessed = 0;
    let totalAssignments = 0;
    let totalBatches = 0;
    const affectedSpaces = new Set<string>();
    if (shouldUseBatchAPI) {
      logger.info(
        `Using Batch AI processing for ${episodes.length} episodes`,
        {
          mode,
          userId,
          batchSize,
        },
      );
      const batchResult = await processBatchAI(
        episodes,
        spaces,
        userId,
        mode,
        newSpaceId,
        batchSize,
      );
      totalProcessed = batchResult.processed;
      totalAssignments = batchResult.assignments;
      batchResult.affectedSpaces?.forEach((spaceId) =>
        affectedSpaces.add(spaceId),
      );
    } else {
      logger.info(
        `Using sequential processing for ${episodes.length} episodes (below batch threshold)`,
        {
          mode,
          userId,
          minRequired: config.minEpisodesForBatch,
        },
      );
      totalBatches = Math.ceil(episodes.length / batchSize);
      for (let i = 0; i < totalBatches; i++) {
        const batch = episodes.slice(i * batchSize, (i + 1) * batchSize);
        logger.info(
          `Processing batch ${i + 1}/${totalBatches} with ${batch.length} episodes`,
          {
            mode,
            userId,
          },
        );
        const batchResult = await processBatch(
          batch,
          spaces,
          userId,
          mode,
          newSpaceId,
        );
        totalProcessed += batchResult.processed;
        totalAssignments += batchResult.assignments;
        batchResult.affectedSpaces?.forEach((spaceId) =>
          affectedSpaces.add(spaceId),
        );
        // Add delay between batches to avoid rate limiting
        if (i < totalBatches - 1) {
          await new Promise((resolve) => setTimeout(resolve, 1000));
        }
      }
    }
    logger.info(`Completed LLM space assignment`, {
      userId,
      mode,
      totalProcessed,
      totalAssignments,
      spacesAvailable: spaces.length,
      affectedSpaces: affectedSpaces.size,
    });
    // 4. Update space status to "processing" for affected spaces
    if (affectedSpaces.size > 0) {
      try {
        await updateMultipleSpaceStatuses(
          Array.from(affectedSpaces),
          SPACE_STATUS.PROCESSING,
          {
            userId,
            operation: "space-assignment",
            metadata: { mode, phase: "start_processing" },
          },
        );
      } catch (statusError) {
        logger.warn(`Failed to update space statuses to processing:`, {
          error: statusError,
          userId,
          mode,
        });
      }
    }
    // 5. Trigger space summaries for affected spaces (if callback provided)
    if (affectedSpaces.size > 0 && enqueueSpaceSummary) {
      try {
        logger.info(
          `Triggering space summaries for ${affectedSpaces.size} affected spaces in parallel`,
        );
        const summaryPromises = Array.from(affectedSpaces).map((spaceId) =>
          enqueueSpaceSummary({
            userId,
            workspaceId,
            spaceId,
            triggerSource: "assignment",
          }).catch((error) => {
            logger.warn(`Failed to trigger summary for space ${spaceId}:`, {
              error,
            });
            return { success: false, spaceId, error: error.message };
          }),
        );
        const summaryResults = await Promise.allSettled(summaryPromises);
        const successful = summaryResults.filter(
          (r) => r.status === "fulfilled",
        ).length;
        const failed = summaryResults.filter(
          (r) => r.status === "rejected",
        ).length;
        logger.info(`Space summary triggers completed`, {
          userId,
          mode,
          totalSpaces: affectedSpaces.size,
          successful,
          failed,
        });
      } catch (summaryError) {
        logger.warn(`Failed to trigger space summaries after assignment:`, {
          error: summaryError,
          userId,
          mode,
          affectedSpaces: Array.from(affectedSpaces),
        });
      }
    }
    // 6. Update space status to "ready" after all processing is complete
    if (affectedSpaces.size > 0) {
      try {
        await updateMultipleSpaceStatuses(
          Array.from(affectedSpaces),
          SPACE_STATUS.READY,
          {
            userId,
            operation: "space-assignment",
            metadata: { mode, phase: "completed_processing" },
          },
        );
      } catch (finalStatusError) {
        logger.warn(`Failed to update space statuses to ready:`, {
          error: finalStatusError,
          userId,
          mode,
        });
      }
    }
    return {
      success: true,
      mode,
      processed: totalProcessed,
      assignments: totalAssignments,
      batches: totalBatches,
      spacesAvailable: spaces.length,
      affectedSpaces: affectedSpaces.size,
    };
  } catch (error) {
    logger.error(
      `Error in LLM space assignment for user ${userId}:`,
      error as Record<string, unknown>,
    );
    throw error;
  }
 }
 async function getEpisodesToAnalyze(
  userId: string,
  mode: "new_space" | "episode",
  options: { newSpaceId?: string; episodeIds?: string[] },
 ): Promise<EpisodeData[]> {
  let query: string;
  let params: any = { userId };
  if (mode === "new_space") {
    query = `
      MATCH (e:Episode {userId: $userId})
      WHERE e.validAt IS NOT NULL
      RETURN e.uuid as uuid, e.content as content, e.originalContent as originalContent,
             e.source as source, e.createdAt as createdAt, e.metadata as metadata
      ORDER BY e.validAt DESC
      LIMIT 100
    `;
  } else {
    // episode mode: analyze specific episodes
    if (!options.episodeIds || options.episodeIds.length === 0) {
      return [];
    }
    query = `
      MATCH (e:Episode {userId: $userId})
      WHERE e.uuid IN $episodeIds AND e.validAt IS NOT NULL
      RETURN e.uuid as uuid, e.content as content, e.originalContent as originalContent,
             e.source as source, e.createdAt as createdAt, e.metadata as metadata
    `;
    params.episodeIds = options.episodeIds;
  }
  const result = await runQuery(query, params);
  return result.records.map((record) => ({
    uuid: record.get("uuid"),
    content: record.get("content"),
    originalContent: record.get("originalContent"),
    source: record.get("source"),
    createdAt: record.get("createdAt"),
    metadata: record.get("metadata"),
  }));
 }
 async function processBatchAI(
  episodes: EpisodeData[],
  spaces: Space[],
  userId: string,
  mode: string,
  newSpaceId?: string,
  batchSize = 20,
 ): Promise<{
  processed: number;
  assignments: number;
  affectedSpaces?: string[];
 }> {
  const spaceData = await Promise.all(
    spaces.map(async (space) => ({
      uuid: space.uuid,
      name: space.name,
      description: space.description || "",
      episodeCount: await getSpaceEpisodeCount(space.uuid),
    })),
  );
  // Create batch request
  const customId = `batch-${userId}-${Date.now()}`;
  const request = {
    custom_id: customId,
    method: "POST",
    url: "/v1/chat/completions",
    body: {
      model: "gpt-4o-mini-2024-07-18",
      messages: generateAssignmentPrompt(episodes, spaceData, mode, newSpaceId),
      response_format: { type: "json_object" },
    },
  };
  // Submit batch
  const batch = await createBatch([request]);
  logger.info(`Batch created: ${batch.id}`);
  // Poll for completion
  let batchResult = await getBatch(batch.id);
  while (batchResult.status === "in_progress" || batchResult.status === "validating") {
    await new Promise((resolve) => setTimeout(resolve, 5000));
    batchResult = await getBatch(batch.id);
  }
  if (batchResult.status !== "completed") {
    throw new Error(`Batch processing failed: ${batchResult.status}`);
  }
  // Parse results
  const results = batchResult.output || [];
  let totalAssignments = 0;
  const affectedSpaces = new Set<string>();
  for (const result of results) {
    const response = result.response?.body?.choices?.[0]?.message?.content;
    if (!response) continue;
    try {
      const parsed = JSON.parse(response);
      const assignments = AssignmentResultSchema.parse(parsed.assignments);
      for (const assignment of assignments) {
        if (assignment.addSpaceId && assignment.addSpaceId.length > 0) {
          await assignEpisodesToSpace(
            userId,
            assignment.episodeId,
            assignment.addSpaceId,
          );
          totalAssignments++;
          assignment.addSpaceId.forEach((spaceId) => affectedSpaces.add(spaceId));
        }
      }
    } catch (parseError) {
      logger.warn("Failed to parse batch result:", parseError);
    }
  }
  return {
    processed: episodes.length,
    assignments: totalAssignments,
    affectedSpaces: Array.from(affectedSpaces),
  };
 }
 async function processBatch(
  episodes: EpisodeData[],
  spaces: Space[],
  userId: string,
  mode: string,
  newSpaceId?: string,
 ): Promise<{
  processed: number;
  assignments: number;
  affectedSpaces?: string[];
 }> {
  const spaceData = await Promise.all(
    spaces.map(async (space) => ({
      uuid: space.uuid,
      name: space.name,
      description: space.description || "",
      episodeCount: await getSpaceEpisodeCount(space.uuid),
    })),
  );
  const messages = generateAssignmentPrompt(episodes, spaceData, mode, newSpaceId);
  const response = await makeModelCall({
    messages,
    mode: "json",
    complexity: "high",
  });
  let assignments: AssignmentResult[] = [];
  try {
    const parsed = JSON.parse(response.text);
    const validated = AssignmentResultSchema.parse(parsed.assignments);
    assignments = validated.map((a) => ({
      episodeId: a.episodeId,
      spaceIds: a.addSpaceId,
      confidence: a.confidence,
      reasoning: a.reasoning,
    }));
  } catch (parseError) {
    logger.warn("Failed to parse LLM response:", parseError);
    return { processed: 0, assignments: 0 };
  }
  const affectedSpaces = new Set<string>();
  let totalAssignments = 0;
  for (const assignment of assignments) {
    if (assignment.spaceIds && assignment.spaceIds.length > 0) {
      await assignEpisodesToSpace(
        userId,
        assignment.episodeId,
        assignment.spaceIds,
      );
      totalAssignments++;
      assignment.spaceIds.forEach((spaceId) => affectedSpaces.add(spaceId));
    }
  }
  return {
    processed: episodes.length,
    assignments: totalAssignments,
    affectedSpaces: Array.from(affectedSpaces),
  };
 }
 function generateAssignmentPrompt(
  episodes: EpisodeData[],
  spaces: SpaceData[],
  mode: string,
  newSpaceId?: string,
 ): CoreMessage[] {
  const systemPrompt = `You are a knowledge organization assistant that assigns episodes (memories/experiences) to relevant topical spaces.
 Your task is to analyze each episode and determine which existing spaces it belongs to based on:
 1. Topic relevance - Does the episode discuss topics related to the space?
 2. Context alignment - Does it fit the overall theme and context of the space?
 3. Information value - Does it add meaningful information to the space?
 Guidelines:
 - An episode can belong to multiple spaces if it's relevant to multiple topics
 - Only assign to spaces where the episode provides meaningful context
 - Be selective - not every episode needs to be assigned to every space
 - Consider the space's existing content (episodeCount) when making decisions
 Return your assignments as a JSON object with this structure:
 {
  "assignments": [
    {
      "episodeId": "episode-uuid",
      "addSpaceId": ["space-uuid-1", "space-uuid-2"],
      "confidence": 0.85,
      "reasoning": "Brief explanation of why this assignment makes sense"
    }
  ]
 }`;
  const episodesText = episodes
    .map(
      (ep, i) =>
        `Episode ${i + 1} (ID: ${ep.uuid}):
 Source: ${ep.source}
 Content: ${ep.content.slice(0, 500)}${ep.content.length > 500 ? "..." : ""}
 `,
    )
    .join("\n\n");
  const spacesText = spaces
    .map(
      (s) =>
        `Space: ${s.name} (ID: ${s.uuid})
 Description: ${s.description || "No description"}
 Current episodes: ${s.episodeCount}`,
    )
    .join("\n\n");
  const userPrompt = `Available Spaces:
 ${spacesText}
 Episodes to Assign:
 ${episodesText}
 Analyze each episode and return your assignment decisions.`;
  return [
    { role: "system", content: systemPrompt },
    { role: "user", content: userPrompt },
  ];
 }
--- a/apps/webapp/app/lib/neo4j.server.ts
+++ b/apps/webapp/app/lib/neo4j.server.ts
@ -415,6 +415,26 @@ const initializeSchema = async () => {
  }
 };
 /**
 * Generate Cypher code for calculating cosine similarity between two vectors
 * This is a workaround for not having the GDS library installed
 *
 * @param vec1Name - Name of the first vector property (e.g., 'entity.nameEmbedding')
 * @param vec2Name - Name of the second vector parameter (e.g., '$queryEmbedding')
 * @returns Cypher expression that calculates cosine similarity
 */
 export function cosineSimilarityCypher(vec1Name: string, vec2Name: string): string {
  return `
    CASE
      WHEN size(${vec1Name}) > 0 AND size(${vec2Name}) > 0 THEN
        reduce(dot = 0.0, i IN range(0, size(${vec1Name})-1) | dot + ${vec1Name}[i] * ${vec2Name}[i]) /
        (sqrt(reduce(sum = 0.0, i IN range(0, size(${vec1Name})-1) | sum + ${vec1Name}[i] * ${vec1Name}[i])) *
         sqrt(reduce(sum = 0.0, i IN range(0, size(${vec2Name})-1) | sum + ${vec2Name}[i] * ${vec2Name}[i])))
      ELSE 0.0
    END
  `.trim();
 }
 // Close the driver when the application shuts down
 const closeDriver = async () => {
  await driver.close();
--- a/apps/webapp/app/lib/queue-adapter.server.ts
+++ b/apps/webapp/app/lib/queue-adapter.server.ts
@ -178,8 +178,9 @@ export async function enqueueSessionCompaction(
 export async function enqueueSpaceAssignment(payload: {
  userId: string;
  workspaceId: string;
-  mode: "episode";
+  mode: "episode" | "new_space";
-  episodeIds: string[];
+  episodeIds?: string[];
  newSpaceId?: string;
 }): Promise<void> {
  const provider = env.QUEUE_PROVIDER as QueueProvider;
@ -189,8 +190,11 @@ export async function enqueueSpaceAssignment(payload: {
    );
    await triggerSpaceAssignment(payload);
  } else {
-    // For BullMQ, space assignment is not implemented yet
+    // BullMQ
-    // You can add it later when needed
+    const { spaceAssignmentQueue } = await import("~/bullmq/queues");
-    console.warn("Space assignment not implemented for BullMQ yet");
+    await spaceAssignmentQueue.add("space-assignment", payload, {
      attempts: 2,
      backoff: { type: "exponential", delay: 5000 },
    });
  }
 }
--- a/apps/webapp/app/routes/home.conversation.$conversationId.tsx
+++ b/apps/webapp/app/routes/home.conversation.$conversationId.tsx
@ -48,7 +48,7 @@ export async function loader({ params, request }: LoaderFunctionArgs) {
  const run = await getCurrentConversationRun(conversation.id, workspace.id);
-  return { conversation, run, apiURL: env.TRIGGER_API_URL };
+  return { conversation, run, apiURL: env.TRIGGER_API_URL ?? undefined };
 }
 // Example action accessing params
--- a/apps/webapp/app/services/graphModels/compactedSession.ts
+++ b/apps/webapp/app/services/graphModels/compactedSession.ts
@ -1,4 +1,4 @@
-import { runQuery } from "~/lib/neo4j.server";
+import { runQuery, cosineSimilarityCypher } from "~/lib/neo4j.server";
 export interface SessionEpisodeData {
  uuid: string;
@ -169,7 +169,7 @@ export async function searchCompactedSessionsByEmbedding(
    MATCH (cs:CompactedSession {userId: $userId})
    WHERE cs.summaryEmbedding IS NOT NULL
    WITH cs,
-         gds.similarity.cosine(cs.summaryEmbedding, $embedding) AS score
+         ${cosineSimilarityCypher('cs.summaryEmbedding', '$embedding')} AS score
    WHERE score >= $minScore
    RETURN cs, score
    ORDER BY score DESC
--- a/apps/webapp/app/services/graphModels/entity.ts
+++ b/apps/webapp/app/services/graphModels/entity.ts
@ -79,10 +79,8 @@ export async function findSimilarEntities(params: {
  const limit = params.limit || 5;
  const query = `
          CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
-          YIELD node AS entity
+          YIELD node AS entity, score
-          WHERE entity.userId = $userId
+          WHERE entity.userId = $userId AND score >= $threshold
          WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
          WHERE score >= $threshold
          RETURN entity, score
          ORDER BY score DESC
          LIMIT ${limit}
@ -116,11 +114,10 @@ export async function findSimilarEntitiesWithSameType(params: {
  const limit = params.limit || 5;
  const query = `
          CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
-          YIELD node AS entity
+          YIELD node AS entity, score
          WHERE entity.userId = $userId
          AND entity.type = $entityType
-          WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
+          AND score >= $threshold
          WHERE score >= $threshold
          RETURN entity, score
          ORDER BY score DESC
          LIMIT ${limit}
--- a/apps/webapp/app/services/graphModels/episode.ts
+++ b/apps/webapp/app/services/graphModels/episode.ts
@ -147,10 +147,8 @@ export async function searchEpisodesByEmbedding(params: {
  const limit = params.limit || 100;
  const query = `
  CALL db.index.vector.queryNodes('episode_embedding', ${limit * 2}, $embedding)
-  YIELD node AS episode
+  YIELD node AS episode, score
-  WHERE episode.userId = $userId
+  WHERE episode.userId = $userId AND score >= $minSimilarity
  WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
  WHERE score >= $minSimilarity
  RETURN episode, score
  ORDER BY score DESC
  LIMIT ${limit}`;
@ -292,10 +290,8 @@ export async function getRelatedEpisodesEntities(params: {
  const limit = params.limit || 100;
  const query = `
  CALL db.index.vector.queryNodes('episode_embedding', ${limit * 2}, $embedding)
-  YIELD node AS episode
+  YIELD node AS episode, score
-  WHERE episode.userId = $userId
+  WHERE episode.userId = $userId AND score >= $minSimilarity
  WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
  WHERE score >= $minSimilarity
  OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
  WHERE entity IS NOT NULL
  RETURN DISTINCT entity
--- a/apps/webapp/app/services/graphModels/statement.ts
+++ b/apps/webapp/app/services/graphModels/statement.ts
@ -214,12 +214,11 @@ export async function findSimilarStatements({
  const limit = 100;
  const query = `
      CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $factEmbedding)
-      YIELD node AS statement
+      YIELD node AS statement, score
      WHERE statement.userId = $userId
        AND statement.invalidAt IS NULL
        AND score >= $threshold
        ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
      WITH statement, gds.similarity.cosine(statement.factEmbedding, $factEmbedding) AS score
      WHERE score >= $threshold
      RETURN statement, score
      ORDER BY score DESC
      LIMIT ${limit}
@ -415,11 +414,10 @@ export async function searchStatementsByEmbedding(params: {
  const limit = params.limit || 100;
  const query = `
  CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $embedding)
-  YIELD node AS statement
+  YIELD node AS statement, score
  WHERE statement.userId = $userId
    AND statement.invalidAt IS NULL
-  WITH statement, gds.similarity.cosine(statement.factEmbedding, $embedding) AS score
+    AND score >= $minSimilarity
  WHERE score >= $minSimilarity
  RETURN statement, score
  ORDER BY score DESC
  LIMIT ${limit}
--- a/apps/webapp/app/services/search/utils.ts
+++ b/apps/webapp/app/services/search/utils.ts
@ -2,7 +2,7 @@ import type { EntityNode, StatementNode, EpisodicNode } from "@core/types";
 import type { SearchOptions } from "../search.server";
 import type { Embedding } from "ai";
 import { logger } from "../logger.service";
-import { runQuery } from "~/lib/neo4j.server";
+import { runQuery, cosineSimilarityCypher } from "~/lib/neo4j.server";
 import { getEmbedding } from "~/lib/model.server";
 import { findSimilarEntities } from "../graphModels/entity";
@ -132,13 +132,13 @@ export async function performVectorSearch(
    }
    const limit = options.limit || 100;
-    // 1. Search for similar statements using GDS cosine similarity with provenance count
+    // 1. Search for similar statements using native Cypher cosine similarity with provenance count
    const cypher = `
    MATCH (s:Statement)
    WHERE s.userId = $userId
    ${timeframeCondition}
    ${spaceCondition}
-    WITH s, gds.similarity.cosine(s.factEmbedding, $embedding) AS score
+    WITH s, ${cosineSimilarityCypher('s.factEmbedding', '$embedding')} AS score
    WHERE score >= 0.5
    OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
    WITH s, score, count(episode) as provenanceCount
@ -252,7 +252,7 @@ async function bfsTraversal(
      WHERE e.uuid IN $entityIds
        ${timeframeCondition}
      WITH DISTINCT s  // Deduplicate first
-      WITH s, gds.similarity.cosine(s.factEmbedding, $queryEmbedding) AS relevance
+      WITH s, ${cosineSimilarityCypher('s.factEmbedding', '$queryEmbedding')} AS relevance
      WHERE relevance >= $explorationThreshold
      RETURN s.uuid AS uuid, relevance
      ORDER BY relevance DESC
--- a/apps/webapp/app/utils/startup.ts
+++ b/apps/webapp/app/utils/startup.ts
@ -16,32 +16,31 @@ export async function initializeStartupServices() {
    return;
  }
  // Wait for TRIGGER_API_URL/login to be available, up to 1 minute
  async function waitForTriggerLogin(
    url: string,
    timeoutMs = 60000,
    intervalMs = 2000,
  ) {
    const start = Date.now();
    while (Date.now() - start < timeoutMs) {
      try {
        const res = await fetch(`${url}/login`, { method: "GET" });
        if (res.ok) {
          return;
        }
      } catch (e) {
        // ignore, will retry
      }
      await new Promise((resolve) => setTimeout(resolve, intervalMs));
    }
    // If we get here, the service is still not available
    console.error(
      `${url}/login is not available after ${timeoutMs / 1000} seconds. Exiting process.`,
    );
    process.exit(1);
  }
  if (env.QUEUE_PROVIDER === "trigger") {
    async function waitForTriggerLogin(
      url: string,
      timeoutMs = 60000,
      intervalMs = 2000,
    ) {
      const start = Date.now();
      while (Date.now() - start < timeoutMs) {
        try {
          const res = await fetch(`${url}/login`, { method: "GET" });
          if (res.ok) {
            return;
          }
        } catch (e) {
          // ignore, will retry
        }
        await new Promise((resolve) => setTimeout(resolve, intervalMs));
      }
      // If we get here, the service is still not available
      console.error(
        `${url}/login is not available after ${timeoutMs / 1000} seconds. Exiting process.`,
      );
      process.exit(1);
    }
    try {
      const triggerApiUrl = env.TRIGGER_API_URL;
      if (triggerApiUrl) {
@ -143,6 +142,10 @@ export async function addEnvVariablesInTrigger() {
    OPENAI_API_KEY: OPENAI_API_KEY ?? "",
  };
  if (!TRIGGER_API_URL || !TRIGGER_PROJECT_ID || !TRIGGER_SECRET_KEY) {
    throw new Error("Trigger environment variables are not set");
  }
  const envName = env.NODE_ENV === "production" ? "prod" : "dev";
  const apiBase = `${TRIGGER_API_URL}/api/v1`;
  const envVarsUrl = `${apiBase}/projects/${TRIGGER_PROJECT_ID}/envvars/${envName}`;
@ -237,3 +240,4 @@ export async function addEnvVariablesInTrigger() {
    throw err;
  }
 }
--- a/docs/self-hosting/environment-variables.mdx
+++ b/docs/self-hosting/environment-variables.mdx
@ -47,11 +47,13 @@ Environment variables for the CORE webapp container.
 | `OPENAI_API_KEY`                        | No       | —                                              | OpenAI API key for memory processing                                                             |
 | `MODEL`                                 | No       | gpt-4-turbo-2024-04-09                         | Default language model                                                                           |
 | `EMBEDDING_MODEL`                       | No       | text-embedding-3-small                         | Model for text embeddings                                                                        |
 | `EMBEDDING_MODEL_SIZE`                  | No       | 1024                                           | Embedding vector dimensions (1536 for text-embedding-3-small, 1024 for mxbai-embed-large)        |
 | `OLLAMA_URL`                            | No       | http://ollama:11434                            | Ollama server URL for local models                                                               |
 | **Background Jobs - Trigger.dev**       |          |                                                |                                                                                                 |
-| `TRIGGER_PROJECT_ID`                    | Yes      | —                                              | Trigger.dev project identifier                                                                   |
+| `QUEUE_PROVIDER`                        | No       | bullmq                                         | Queue provider (bullmq or trigger)                                                               |
-| `TRIGGER_SECRET_KEY`                    | Yes      | —                                              | Trigger.dev authentication secret                                                                |
+| `TRIGGER_PROJECT_ID`                    | No*      | —                                              | Trigger.dev project identifier (*required if QUEUE_PROVIDER is trigger)                          |
-| `TRIGGER_API_URL`                       | Yes      | http://host.docker.internal:8030               | Trigger.dev API endpoint (use localhost:8030 for local, api.trigger.dev for cloud)             |
+| `TRIGGER_SECRET_KEY`                    | No*      | —                                              | Trigger.dev authentication secret (*required if QUEUE_PROVIDER is trigger)                       |
 | `TRIGGER_API_URL`                       | No*      | http://host.docker.internal:8030               | Trigger.dev API endpoint (*required if QUEUE_PROVIDER is trigger)                                |
 ## Security Considerations
@ -85,4 +87,34 @@ When using Docker Compose, service names are used as hostnames:
 - `redis` for Redis
 - `ollama` for Ollama (if using local models)
-For external services (like Trigger.dev), use `host.docker.internal` to access services running on the host machine.
+For external services (like Trigger.dev), use `host.docker.internal` to access services running on the host machine.
 ## Neo4j Vector Index Configuration
 The `EMBEDDING_MODEL_SIZE` must match the dimension output of your embedding model:
 - **text-embedding-3-small** (OpenAI): 1536 dimensions
 - **text-embedding-3-large** (OpenAI): 3072 dimensions
 - **mxbai-embed-large** (Ollama): 1024 dimensions
 ### Recreating Vector Indexes
 If you change embedding models or get dimension mismatch errors, you need to recreate the Neo4j vector indexes:
 ```bash
 # Connect to Neo4j Browser at http://localhost:7474
 # Or use cypher-shell in the Neo4j container
 # Drop existing vector indexes
 DROP INDEX entity_embedding IF EXISTS;
 DROP INDEX statement_embedding IF EXISTS;
 DROP INDEX episode_embedding IF EXISTS;
 # Restart your application - indexes will be recreated with correct dimensions
 # The application automatically creates indexes on startup
 ```
 **Important:** Changing `EMBEDDING_MODEL_SIZE` after data exists requires:
 1. Dropping the old vector indexes
 2. Re-embedding all existing entities, statements, and episodes
 3. The application will recreate indexes on next startup
--- a/hosting/docker/docker-compose.yaml
+++ b/hosting/docker/docker-compose.yaml
@ -32,10 +32,8 @@ services:
      - ENABLE_EMAIL_LOGIN=${ENABLE_EMAIL_LOGIN}
      - OLLAMA_URL=${OLLAMA_URL}
      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
      - EMBEDDING_MODEL_SIZE=${EMBEDDING_MODEL_SIZE}
      - MODEL=${MODEL}
      - TRIGGER_PROJECT_ID=${TRIGGER_PROJECT_ID}
      - TRIGGER_SECRET_KEY=${TRIGGER_SECRET_KEY}
      - TRIGGER_API_URL=${API_ORIGIN}
      - POSTGRES_DB=${POSTGRES_DB}
      - EMAIL_TRANSPORT=${EMAIL_TRANSPORT}
      - REPLY_TO_EMAIL=${REPLY_TO_EMAIL}
--- a/scripts/README.md
+++ b/scripts/README.md
@ -0,0 +1,141 @@
 # Neo4j Index Management Scripts
 Scripts to manage and fix Neo4j vector indexes for the CORE application.
 ## Problem
 If you're seeing errors like:
 ```
 Failed to invoke procedure `db.index.vector.queryNodes`:
 Caused by: java.lang.IllegalArgumentException: There is no such vector schema index: entity_embedding
 ```
 Or:
 ```
 Index query vector has 1536 dimensions, but indexed vectors have 1024.
 ```
 These scripts will help you fix the issue.
 ## Prerequisites
 Before running these scripts, ensure:
 1. `EMBEDDING_MODEL_SIZE` is set correctly in your `.env` file (1536 for text-embedding-3-small)
 2. Neo4j is running and accessible
 3. You have the Neo4j credentials
 ## Option 1: Quick Fix (Recommended)
 ### Using Docker
 If you're running Neo4j in Docker:
 ```bash
 # Set your Neo4j password
 export NEO4J_PASSWORD="your-neo4j-password"
 # Run the fix script inside the Neo4j container
 docker exec -it <neo4j-container-name> \
  bash -c "NEO4J_PASSWORD=$NEO4J_PASSWORD /path/to/fix-neo4j-indexes.sh"
 ```
 Or copy the script into the container:
 ```bash
 # From the core directory
 docker cp scripts/fix-neo4j-indexes.sh <neo4j-container-name>:/tmp/
 docker exec -it <neo4j-container-name> bash
 # Inside the container:
 export NEO4J_PASSWORD="your-password"
 bash /tmp/fix-neo4j-indexes.sh
 ```
 ### Using cypher-shell locally
 If you have cypher-shell installed:
 ```bash
 export NEO4J_URI="bolt://localhost:7687"
 export NEO4J_USERNAME="neo4j"
 export NEO4J_PASSWORD="your-password"
 export EMBEDDING_MODEL_SIZE="1536"  # or 1024 for mxbai-embed-large
 ./scripts/fix-neo4j-indexes.sh
 ```
 ## Option 2: Manual Fix via Neo4j Browser
 1. Open Neo4j Browser at http://localhost:7474
 2. Login with your Neo4j credentials
 3. Copy and paste the contents of `scripts/create-vector-indexes.cypher`
 4. Execute the queries
 **Note:** If using a different embedding model, update the `vector.dimensions` value in the script:
 - `text-embedding-3-small`: 1536
 - `text-embedding-3-large`: 3072
 - `mxbai-embed-large`: 1024
 ## Checking Index Status
 To verify your indexes are correct:
 ### Via Neo4j Browser
 ```cypher
 SHOW INDEXES YIELD name, type, labelsOrTypes, properties
 WHERE type = 'VECTOR'
 RETURN name, type, labelsOrTypes, properties;
 ```
 ### Via cypher-shell
 ```bash
 cypher-shell -u neo4j -p your-password -a bolt://localhost:7687 \
  "SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' RETURN name, type;"
 ```
 ## After Fixing Indexes
 1. **Restart your application** to ensure it picks up the new indexes
 2. **Clear old data** (optional but recommended):
   - If you had existing entities/episodes with embeddings in the wrong dimension, they won't work with the new indexes
   - Either delete and re-ingest your data, or run a migration to re-embed existing data
 ## Troubleshooting
 ### "cypher-shell not found"
 Install Neo4j shell tools or use Neo4j Browser instead.
 ### "Connection refused"
 - Check if Neo4j is running: `docker ps | grep neo4j`
 - Verify the Neo4j URI is correct
 - Check if Neo4j port (7687) is accessible
 ### "Authentication failed"
 - Verify your Neo4J password matches your configuration
 - Check the NEO4J_PASSWORD environment variable
 ### Indexes created but still getting errors
 - Make sure `EMBEDDING_MODEL_SIZE=1536` is in your `.env` file
 - Restart the application completely: `docker-compose restart webapp`
 - Check application logs for startup errors
 ## Understanding the Scripts
 ### check-neo4j-indexes.cypher
 Read-only queries to check index status. Safe to run anytime.
 ### create-vector-indexes.cypher
 Drops and recreates vector indexes with 1536 dimensions. Edit the dimension value if using a different embedding model.
 ### fix-neo4j-indexes.sh
 Automated bash script that:
 1. Checks current indexes
 2. Drops existing vector indexes
 3. Creates new indexes with correct dimensions
 4. Verifies the fix
--- a/scripts/check-data-status.cypher
+++ b/scripts/check-data-status.cypher
@ -0,0 +1,33 @@
 // Check if there's any data in Neo4j for debugging
 // 1. Count all nodes by type
 MATCH (n)
 WHERE n.userId IS NOT NULL
 RETURN labels(n)[0] AS nodeType, count(n) AS count
 ORDER BY count DESC;
 // 2. Check recent episodes
 MATCH (e:Episode)
 WHERE e.userId IS NOT NULL
 RETURN e.uuid, e.content, e.createdAt, e.spaceIds, e.validAt
 ORDER BY e.createdAt DESC
 LIMIT 5;
 // 3. Check if episodes have statements
 MATCH (e:Episode)-[:HAS_PROVENANCE]->(s:Statement)
 WHERE e.userId IS NOT NULL
 RETURN count(e) AS episodesWithStatements;
 // 4. Check entities
 MATCH (ent:Entity)
 WHERE ent.userId IS NOT NULL
 RETURN ent.uuid, ent.name, ent.type
 LIMIT 10;
 // 5. Check for complete triplets (Episode -> Statement -> Subject/Predicate/Object)
 MATCH (e:Episode)-[:HAS_PROVENANCE]->(s:Statement)
 MATCH (s)-[:HAS_SUBJECT]->(subj:Entity)
 MATCH (s)-[:HAS_PREDICATE]->(pred:Entity)
 MATCH (s)-[:HAS_OBJECT]->(obj:Entity)
 WHERE e.userId IS NOT NULL
 RETURN count(*) AS completeTriplets;
--- a/scripts/check-index-dimensions.cypher
+++ b/scripts/check-index-dimensions.cypher
@ -0,0 +1,19 @@
 // Check vector index configuration including dimensions
 SHOW INDEXES
 YIELD name, type, labelsOrTypes, properties, options
 WHERE type = 'VECTOR'
 RETURN
  name,
  type,
  labelsOrTypes,
  properties,
  options.indexConfig AS indexConfig;
 // Specifically check the dimensions
 SHOW INDEXES
 YIELD name, options
 WHERE name IN ['entity_embedding', 'statement_embedding', 'episode_embedding']
 RETURN
  name,
  options.indexConfig['vector.dimensions'] AS dimensions,
  options.indexConfig['vector.similarity_function'] AS similarity;
--- a/scripts/check-neo4j-indexes.cypher
+++ b/scripts/check-neo4j-indexes.cypher
@ -0,0 +1,8 @@
 // Check all Neo4j indexes
 SHOW INDEXES;
 // Check specifically for vector indexes
 SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' RETURN name, type;
 // Check for specific vector indexes we need
 SHOW INDEXES YIELD name WHERE name IN ['entity_embedding', 'statement_embedding', 'episode_embedding'] RETURN name;
--- a/scripts/create-vector-indexes.cypher
+++ b/scripts/create-vector-indexes.cypher
@ -0,0 +1,22 @@
 // Drop existing vector indexes if they exist (to handle dimension mismatches)
 DROP INDEX entity_embedding IF EXISTS;
 DROP INDEX statement_embedding IF EXISTS;
 DROP INDEX episode_embedding IF EXISTS;
 // Create vector indexes with 1536 dimensions (for text-embedding-3-small)
 // Change the vector.dimensions value if using a different embedding model:
 // - text-embedding-3-small: 1536
 // - text-embedding-3-large: 3072
 // - mxbai-embed-large (Ollama): 1024
 CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
 OPTIONS {indexConfig: {`vector.dimensions`: 1536, `vector.similarity_function`: 'cosine', `vector.hnsw.ef_construction`: 400, `vector.hnsw.m`: 32}};
 CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
 OPTIONS {indexConfig: {`vector.dimensions`: 1536, `vector.similarity_function`: 'cosine', `vector.hnsw.ef_construction`: 400, `vector.hnsw.m`: 32}};
 CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
 OPTIONS {indexConfig: {`vector.dimensions`: 1536, `vector.similarity_function`: 'cosine', `vector.hnsw.ef_construction`: 400, `vector.hnsw.m`: 32}};
 // Verify indexes were created
 SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' RETURN name, type;
--- a/scripts/fix-neo4j-indexes.sh
+++ b/scripts/fix-neo4j-indexes.sh
@ -0,0 +1,91 @@
 #!/bin/bash
 # Script to fix Neo4j vector indexes
 # This script recreates the vector indexes with the correct dimensions
 set -e
 echo "========================================="
 echo "Neo4j Vector Index Fix Script"
 echo "========================================="
 echo ""
 # Default values
 NEO4J_URI=${NEO4J_URI:-"bolt://localhost:7687"}
 NEO4J_USERNAME=${NEO4J_USERNAME:-"neo4j"}
 NEO4J_PASSWORD=${NEO4J_PASSWORD:-""}
 # Check if running in Docker
 if [ -f "/.dockerenv" ] || [ -f "/run/.containerenv" ]; then
    echo "Running inside Docker container"
    NEO4J_URI=${NEO4J_URI:-"bolt://neo4j:7687"}
 fi
 echo "Neo4j URI: $NEO4J_URI"
 echo ""
 # Check if cypher-shell is available
 if ! command -v cypher-shell &> /dev/null; then
    echo "ERROR: cypher-shell not found!"
    echo ""
    echo "Options:"
    echo "1. Run this script inside the Neo4j container:"
    echo "   docker exec -it <neo4j-container> bash"
    echo "   cd /var/lib/neo4j"
    echo "   /path/to/this/script.sh"
    echo ""
    echo "2. Install Neo4j shell tools locally"
    echo ""
    echo "3. Use Neo4j Browser at http://localhost:7474"
    echo "   and run the queries from: scripts/create-vector-indexes.cypher"
    exit 1
 fi
 echo "Step 1: Checking current indexes..."
 echo "-----------------------------------"
 cypher-shell -u "$NEO4J_USERNAME" -p "$NEO4J_PASSWORD" -a "$NEO4J_URI" \
    "SHOW INDEXES YIELD name, type WHERE type = 'VECTOR' RETURN name, type;"
 echo ""
 echo "Step 2: Dropping existing vector indexes..."
 echo "--------------------------------------------"
 cypher-shell -u "$NEO4J_USERNAME" -p "$NEO4J_PASSWORD" -a "$NEO4J_URI" \
    "DROP INDEX entity_embedding IF EXISTS; DROP INDEX statement_embedding IF EXISTS; DROP INDEX episode_embedding IF EXISTS;"
 echo "Indexes dropped successfully"
 echo ""
 echo "Step 3: Creating new vector indexes with 1536 dimensions..."
 echo "------------------------------------------------------------"
 # Read EMBEDDING_MODEL_SIZE from environment or use default
 DIMENSIONS=${EMBEDDING_MODEL_SIZE:-1536}
 echo "Using $DIMENSIONS dimensions (configure with EMBEDDING_MODEL_SIZE env var)"
 echo ""
 cypher-shell -u "$NEO4J_USERNAME" -p "$NEO4J_PASSWORD" -a "$NEO4J_URI" <<EOF
 CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
 OPTIONS {indexConfig: {\`vector.dimensions\`: $DIMENSIONS, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}};
 CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
 OPTIONS {indexConfig: {\`vector.dimensions\`: $DIMENSIONS, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}};
 CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
 OPTIONS {indexConfig: {\`vector.dimensions\`: $DIMENSIONS, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}};
 EOF
 echo "Indexes created successfully"
 echo ""
 echo "Step 4: Verifying new indexes..."
 echo "---------------------------------"
 cypher-shell -u "$NEO4J_USERNAME" -p "$NEO4J_PASSWORD" -a "$NEO4J_URI" \
    "SHOW INDEXES YIELD name, type, labelsOrTypes, properties WHERE type = 'VECTOR' RETURN name, type, labelsOrTypes, properties;"
 echo ""
 echo "========================================="
 echo "✓ Vector indexes fixed successfully!"
 echo "========================================="
 echo ""
 echo "IMPORTANT: Existing embeddings in your database may have wrong dimensions."
 echo "You may need to re-process your data to regenerate embeddings with the new dimensions."
 echo ""