From 769c79f77340a65836ffc0bc7afc3d9f6269c54e Mon Sep 17 00:00:00 2001
From: Manoj K <saimanoj58@gmail.com>
Date: Sat, 12 Jul 2025 08:57:12 +0530
Subject: [PATCH] Feat: add ingestion rules and web search to chat

---
 .../app/routes/api.v1.ingestion-rules.$id.tsx |  85 ++++++++++
 .../app/routes/api.v1.ingestion-rules.tsx     | 147 ++++++++++++++++++
 .../app/services/knowledgeGraph.server.ts     |  59 +++++++
 apps/webapp/app/services/prompts/normalize.ts |   6 +
 apps/webapp/app/trigger/chat/chat-utils.ts    |  20 ++-
 apps/webapp/app/trigger/chat/prompt.ts        |  68 ++++++--
 apps/webapp/app/trigger/utils/types.ts        |  65 ++++++++
 apps/webapp/app/trigger/utils/utils.ts        |  73 ++++++++-
 apps/webapp/package.json                      |   1 +
 apps/webapp/prisma/schema.prisma              |  20 +++
 .../migration.sql                             |  34 ++++
 packages/database/prisma/schema.prisma        |  20 +++
 pnpm-lock.yaml                                |  48 ++++++
 13 files changed, 634 insertions(+), 12 deletions(-)
 create mode 100644 apps/webapp/app/routes/api.v1.ingestion-rules.$id.tsx
 create mode 100644 apps/webapp/app/routes/api.v1.ingestion-rules.tsx
 create mode 100644 packages/database/prisma/migrations/20250712032515_add_ingestion_rule_model/migration.sql

diff --git a/apps/webapp/app/routes/api.v1.ingestion-rules.$id.tsx b/apps/webapp/app/routes/api.v1.ingestion-rules.$id.tsx
new file mode 100644
index 0000000..6859f94
--- /dev/null
+++ b/apps/webapp/app/routes/api.v1.ingestion-rules.$id.tsx
@@ -0,0 +1,85 @@
+import { json } from "@remix-run/node";
+import { z } from "zod";
+
+import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
+import { prisma } from "~/db.server";
+import { logger } from "~/services/logger.service";
+
+const ParamsSchema = z.object({
+  id: z.string(),
+});
+
+const { action, loader } = createActionApiRoute(
+  {
+    params: ParamsSchema,
+    allowJWT: true,
+    authorization: {
+      action: "manage",
+    },
+    corsStrategy: "all",
+  },
+  async ({ params, authentication, request }) => {
+    try {
+      const user = await prisma.user.findUnique({
+        where: { id: authentication.userId },
+        include: { Workspace: true },
+      });
+
+      if (!user?.Workspace) {
+        throw new Error("User workspace not found");
+      }
+
+      if (request.method === "DELETE") {
+        logger.log("Deleting ingestion rule", { ruleId: params.id, userId: authentication.userId });
+
+        // Soft delete by setting deleted timestamp
+        const rule = await prisma.ingestionRule.update({
+          where: {
+            id: params.id,
+            workspaceId: user.Workspace.id, // Ensure user can only delete their workspace rules
+          },
+          data: {
+            deleted: new Date(),
+          },
+        });
+
+        return json({
+          success: true,
+          message: "Rule deleted successfully",
+          ruleId: rule.id,
+        });
+      } else if (request.method === "GET") {
+        // Get single rule
+        const rule = await prisma.ingestionRule.findFirst({
+          where: {
+            id: params.id,
+            workspaceId: user.Workspace.id,
+            deleted: null,
+          },
+          select: {
+            id: true,
+            name: true,
+            text: true,
+            source: true,
+            isActive: true,
+            createdAt: true,
+            updatedAt: true,
+          },
+        });
+
+        if (!rule) {
+          return json({ success: false, message: "Rule not found" }, { status: 404 });
+        }
+
+        return json({ success: true, rule });
+      }
+
+      return json({ success: false, message: "Method not supported" }, { status: 405 });
+    } catch (error) {
+      logger.error("Failed to manage ingestion rule", { error, ruleId: params.id });
+      throw error;
+    }
+  },
+);
+
+export { action, loader };
\ No newline at end of file
diff --git a/apps/webapp/app/routes/api.v1.ingestion-rules.tsx b/apps/webapp/app/routes/api.v1.ingestion-rules.tsx
new file mode 100644
index 0000000..0db87ca
--- /dev/null
+++ b/apps/webapp/app/routes/api.v1.ingestion-rules.tsx
@@ -0,0 +1,147 @@
+import { json } from "@remix-run/node";
+import { z } from "zod";
+
+import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
+import { prisma } from "~/db.server";
+import { logger } from "~/services/logger.service";
+
+const IngestionRuleCreateSchema = z.object({
+  name: z.string().optional(),
+  text: z.string().min(1, "Rule text is required"),
+  source: z.string().min(1, "Source is required"),
+  isActive: z.boolean().default(true),
+});
+
+const IngestionRuleUpdateSchema = z.object({
+  id: z.string(),
+  name: z.string().optional(),
+  text: z.string().min(1, "Rule text is required").optional(),
+  source: z.string().min(1, "Source is required").optional(),
+  isActive: z.boolean().optional(),
+});
+
+const { action, loader } = createActionApiRoute(
+  {
+    body: z.union([IngestionRuleCreateSchema, IngestionRuleUpdateSchema]),
+    allowJWT: true,
+    authorization: {
+      action: "manage",
+    },
+    corsStrategy: "all",
+  },
+  async ({ body, authentication, request }) => {
+    try {
+      const user = await prisma.user.findUnique({
+        where: { id: authentication.userId },
+        include: { Workspace: true },
+      });
+
+      if (!user?.Workspace) {
+        throw new Error("User workspace not found");
+      }
+
+      if (request.method === "POST") {
+        // Create new rule
+        const createData = body as z.infer<typeof IngestionRuleCreateSchema>;
+        
+        logger.log("Creating ingestion rule", { createData, userId: authentication.userId });
+
+        const rule = await prisma.ingestionRule.create({
+          data: {
+            name: createData.name,
+            text: createData.text,
+            source: createData.source,
+            isActive: createData.isActive,
+            workspaceId: user.Workspace.id,
+            userId: authentication.userId,
+          },
+        });
+
+        return json({
+          success: true,
+          rule: {
+            id: rule.id,
+            name: rule.name,
+            text: rule.text,
+            source: rule.source,
+            isActive: rule.isActive,
+            createdAt: rule.createdAt,
+            updatedAt: rule.updatedAt,
+          },
+        });
+      } else if (request.method === "PUT") {
+        // Update existing rule
+        const updateData = body as z.infer<typeof IngestionRuleUpdateSchema>;
+        
+        logger.log("Updating ingestion rule", { updateData, userId: authentication.userId });
+
+        const rule = await prisma.ingestionRule.update({
+          where: {
+            id: updateData.id,
+            workspaceId: user.Workspace.id, // Ensure user can only update their workspace rules
+          },
+          data: {
+            ...(updateData.name !== undefined && { name: updateData.name }),
+            ...(updateData.text && { text: updateData.text }),
+            ...(updateData.source && { source: updateData.source }),
+            ...(updateData.isActive !== undefined && { isActive: updateData.isActive }),
+          },
+        });
+
+        return json({
+          success: true,
+          rule: {
+            id: rule.id,
+            name: rule.name,
+            text: rule.text,
+            source: rule.source,
+            isActive: rule.isActive,
+            createdAt: rule.createdAt,
+            updatedAt: rule.updatedAt,
+          },
+        });
+      } else if (request.method === "GET") {
+        // List rules
+        const url = new URL(request.url);
+        const source = url.searchParams.get("source");
+        const isActive = url.searchParams.get("isActive");
+
+        const where: any = {
+          workspaceId: user.Workspace.id,
+          deleted: null,
+        };
+
+        if (source) {
+          where.source = source;
+        }
+
+        if (isActive !== null) {
+          where.isActive = isActive === "true";
+        }
+
+        const rules = await prisma.ingestionRule.findMany({
+          where,
+          orderBy: { createdAt: "desc" },
+          select: {
+            id: true,
+            name: true,
+            text: true,
+            source: true,
+            isActive: true,
+            createdAt: true,
+            updatedAt: true,
+          },
+        });
+
+        return json({ success: true, rules });
+      }
+
+      return json({ success: false, message: "Method not supported" }, { status: 405 });
+    } catch (error) {
+      logger.error("Failed to manage ingestion rules", { error, body });
+      throw error;
+    }
+  },
+);
+
+export { action, loader };
\ No newline at end of file
diff --git a/apps/webapp/app/services/knowledgeGraph.server.ts b/apps/webapp/app/services/knowledgeGraph.server.ts
index 00aa0aa..46fac96 100644
--- a/apps/webapp/app/services/knowledgeGraph.server.ts
+++ b/apps/webapp/app/services/knowledgeGraph.server.ts
@@ -947,12 +947,16 @@ export class KnowledgeGraphService {
     }
     const entityTypes = getNodeTypesString(appEnumValues);
     const relatedMemories = await this.getRelatedMemories(episodeBody, userId);
+    
+    // Fetch ingestion rules for this source
+    const ingestionRules = await this.getIngestionRulesForSource(source, userId);
 
     const context = {
       episodeContent: episodeBody,
       entityTypes: entityTypes,
       source,
       relatedMemories,
+      ingestionRules,
     };
     const messages = normalizePrompt(context);
     let responseText = "";
@@ -1035,4 +1039,59 @@ export class KnowledgeGraphService {
       return "";
     }
   }
+
+  /**
+   * Retrieves active ingestion rules for a specific source and user
+   */
+  private async getIngestionRulesForSource(
+    source: string,
+    userId: string,
+  ): Promise<string | null> {
+    try {
+      // Import prisma here to avoid circular dependencies
+      const { prisma } = await import("~/db.server");
+
+      // Get the user's workspace
+      const user = await prisma.user.findUnique({
+        where: { id: userId },
+        include: { Workspace: true },
+      });
+
+      if (!user?.Workspace) {
+        return null;
+      }
+
+      // Fetch active rules for this source
+      const rules = await prisma.ingestionRule.findMany({
+        where: {
+          source,
+          workspaceId: user.Workspace.id,
+          isActive: true,
+          deleted: null,
+        },
+        select: {
+          text: true,
+          name: true,
+        },
+        orderBy: { createdAt: "asc" },
+      });
+
+      if (rules.length === 0) {
+        return null;
+      }
+
+      // Format rules for the prompt
+      const formattedRules = rules
+        .map((rule, index) => {
+          const ruleName = rule.name ? `${rule.name}: ` : `Rule ${index + 1}: `;
+          return `${ruleName}${rule.text}`;
+        })
+        .join("\n");
+
+      return formattedRules;
+    } catch (error) {
+      console.error("Error retrieving ingestion rules:", error);
+      return null;
+    }
+  }
 }
diff --git a/apps/webapp/app/services/prompts/normalize.ts b/apps/webapp/app/services/prompts/normalize.ts
index 0ce567f..51c73c8 100644
--- a/apps/webapp/app/services/prompts/normalize.ts
+++ b/apps/webapp/app/services/prompts/normalize.ts
@@ -34,6 +34,12 @@ When related memories are provided, make memory graph evolution your PRIMARY GOA
 ## Node Entity Types
 ${context.entityTypes}
 
+## Ingestion Rules
+${context.ingestionRules ? `The following rules apply to content from ${context.source}:
+${context.ingestionRules}
+
+IMPORTANT: If the content does NOT satisfy these rules, respond with "NOTHING_TO_REMEMBER" regardless of other criteria.` : 'No specific ingestion rules defined for this source.'}
+
 ## Related Memory Processing Strategy
 When related memories are provided, apply this filtering and enhancement strategy:
 
diff --git a/apps/webapp/app/trigger/chat/chat-utils.ts b/apps/webapp/app/trigger/chat/chat-utils.ts
index 9e53a60..fba87d5 100644
--- a/apps/webapp/app/trigger/chat/chat-utils.ts
+++ b/apps/webapp/app/trigger/chat/chat-utils.ts
@@ -17,12 +17,13 @@ import { generate, processTag } from "./stream-utils";
 import { type AgentMessage, AgentMessageType, Message } from "./types";
 import { type MCP } from "../utils/mcp";
 import {
+  WebSearchSchema,
   type ExecutionState,
   type HistoryStep,
   type Resource,
   type TotalCost,
 } from "../utils/types";
-import { flattenObject } from "../utils/utils";
+import { flattenObject, webSearch } from "../utils/utils";
 import { searchMemory, addMemory } from "./memory-utils";
 
 interface LLMOutputInterface {
@@ -100,6 +101,12 @@ const addMemoryTool = tool({
   }),
 });
 
+const websearchTool = tool({
+  description:
+    "Search the web for current information and news. Use this when you need up-to-date information that might not be in your training data. Try different search strategies: broad terms first, then specific phrases, keywords, exact quotes. Use multiple searches with varied approaches to get comprehensive results.",
+  parameters: WebSearchSchema,
+});
+
 const internalTools = [
   "core--progress_update",
   "core--search_memory",
@@ -258,6 +265,7 @@ export async function* run(
     "core--progress_update": progressUpdateTool,
     "core--search_memory": searchMemoryTool,
     "core--add_memory": addMemoryTool,
+    "core--websearch": websearchTool,
   };
 
   logger.info("Tools have been formed");
@@ -514,6 +522,16 @@ export async function* run(
                   result =
                     "Memory storage failed - please check your memory configuration";
                 }
+              } else if (toolName === "websearch") {
+                try {
+                  result = await webSearch(skillInput);
+                } catch (apiError) {
+                  logger.error("Web search failed", {
+                    apiError,
+                  });
+                  result =
+                    "Web search failed - please check your search configuration";
+                }
               }
             }
             // Handle other MCP tools
diff --git a/apps/webapp/app/trigger/chat/prompt.ts b/apps/webapp/app/trigger/chat/prompt.ts
index 75e694b..93fd789 100644
--- a/apps/webapp/app/trigger/chat/prompt.ts
+++ b/apps/webapp/app/trigger/chat/prompt.ts
@@ -1,19 +1,65 @@
 export const REACT_SYSTEM_PROMPT = `
-You are a helpful AI assistant with access to user memory. Your primary capabilities are:
+You are a helpful AI assistant with access to user memory and web search capabilities. Your primary capabilities are:
 
 1. **Memory-First Approach**: Always check user memory first to understand context and previous interactions
-2. **Memory Management**: Help users store, retrieve, and organize information in their memory
-3. **Contextual Assistance**: Use memory to provide personalized and contextual responses
+2. **Intelligent Information Gathering**: Analyze queries to determine if current information is needed
+3. **Memory Management**: Help users store, retrieve, and organize information in their memory
+4. **Contextual Assistance**: Use memory to provide personalized and contextual responses
 
 <context>
 {{CONTEXT}}
 </context>
 
-<memory>
-- Always check memory FIRST using core--search_memory before any other actions
-- Consider this your highest priority for EVERY interaction - as essential as breathing
-- Make memory checking your first tool call before any other operations
+<information_gathering>
+Follow this intelligent approach for information gathering:
 
+1. **MEMORY FIRST** (Always Required)
+   - Always check memory FIRST using core--search_memory before any other actions
+   - Consider this your highest priority for EVERY interaction - as essential as breathing
+   - Memory provides context, personal preferences, and historical information
+   - Use memory to understand user's background, ongoing projects, and past conversations
+
+2. **QUERY ANALYSIS** (Determine Information Needs)
+   Analyze the user's query to identify if it requires current/latest information:
+   
+   **Use web search (core--websearch) when query involves:**
+   - Current events, news, or recent developments
+   - "Latest", "recent", "current", "today", "now" keywords
+   - Stock prices, market data, or financial information
+   - Software updates, version releases, or technical documentation
+   - Weather, traffic, or real-time data
+   - Recent changes to websites, APIs, or services
+   - Product releases, availability, or pricing
+   - Breaking news or trending topics
+   - Verification of potentially outdated information
+
+   **Examples requiring web search:**
+   - "What's the latest news about..."
+   - "Current price of..."
+   - "Recent updates to..."
+   - "What happened today..."
+   - "Latest version of..."
+
+3. **INFORMATION SYNTHESIS** (Combine Sources)
+   - Combine memory context with web search results when both are relevant
+   - Use memory to personalize current information based on user preferences
+   - Cross-reference web findings with user's historical interests from memory
+   - Always store new useful information in memory using core--add_memory
+
+4. **TRAINING KNOWLEDGE** (Foundation)
+   - Use your training knowledge as the foundation for analysis and explanation
+   - Apply training knowledge to interpret and contextualize information from memory and web
+   - Fill gaps where memory and web search don't provide complete answers
+   - Indicate when you're using training knowledge vs. live information sources
+
+EXECUTION APPROACH:
+- Memory search is mandatory for every interaction
+- Web search is conditional based on query analysis
+- Both can be executed in parallel when web search is needed
+- Always indicate your information sources in responses
+</information_gathering>
+
+<memory>
 QUERY FORMATION:
 - Write specific factual statements as queries (e.g., "user email address" not "what is the user's email?")
 - Create multiple targeted memory queries for complex requests
@@ -49,7 +95,7 @@ MEMORY USAGE:
 - Blend memory insights naturally into responses
 - Verify you've checked relevant memory before finalizing ANY response
 
-If memory access is unavailable, rely only on the current conversation or ask user
+If memory access is unavailable, proceed to web search or rely on current conversation
 </memory>
 
 <tool_calling>
@@ -58,6 +104,7 @@ You have tools at your disposal to assist users:
 CORE PRINCIPLES:
 - Use tools only when necessary for the task at hand
 - Always check memory FIRST before making other tool calls
+- Use web search when query analysis indicates need for current information
 - Execute multiple operations in parallel whenever possible
 - Use sequential calls only when output of one is required for input of another
 
@@ -73,7 +120,7 @@ PARAMETER HANDLING:
 
 TOOL SELECTION:
 - Never call tools not provided in this conversation
-- Skip tool calls for general questions you can answer directly
+- Skip tool calls for general questions you can answer directly from memory/knowledge
 - For identical operations on multiple items, use parallel tool calls
 - Default to parallel execution (3-5× faster than sequential calls)
 - You can always access external service tools by loading them with load_mcp first
@@ -106,7 +153,7 @@ QUESTIONS - When you need information:
 <p>[Your question with HTML formatting]</p>
 </question_response>
 
-- Ask questions only when you cannot find information through memory or tools
+- Ask questions only when you cannot find information through memory, web search, or tools
 - Be specific about what you need to know
 - Provide context for why you're asking
 
@@ -120,6 +167,7 @@ CRITICAL:
 - Apply proper HTML formatting (<h1>, <h2>, <p>, <ul>, <li>, etc.)
 - Never mix communication formats
 - Keep responses clear and helpful
+- Always indicate your information sources (memory, web search, and/or knowledge)
 </communication>
 `;
 
diff --git a/apps/webapp/app/trigger/utils/types.ts b/apps/webapp/app/trigger/utils/types.ts
index 2f3788a..5ef0252 100644
--- a/apps/webapp/app/trigger/utils/types.ts
+++ b/apps/webapp/app/trigger/utils/types.ts
@@ -1,5 +1,6 @@
 import { type ActionStatusEnum } from "@core/types";
 import { type CoreMessage } from "ai";
+import { z } from "zod";
 
 // Define types for the MCP tool schema
 export interface MCPTool {
@@ -121,3 +122,67 @@ export interface GenerateResponse {
   // eslint-disable-next-line @typescript-eslint/no-explicit-any
   toolCalls: any[];
 }
+
+export interface WebSearchResult {
+  results: Array<{
+    title: string;
+    url: string;
+    content: string;
+    publishedDate: string;
+    highlights: string[];
+    text: string;
+    score: number;
+  }>;
+}
+
+export const WebSearchSchema = z.object({
+  query: z
+    .string()
+    .min(1)
+    .describe("The search query to find relevant web content"),
+  numResults: z
+    .number()
+    .min(1)
+    .max(20)
+    .optional()
+    .default(5)
+    .describe("Number of results to return (1-20, default: 5)"),
+  includeContent: z
+    .boolean()
+    .optional()
+    .default(false)
+    .describe("Whether to include full page content in results"),
+  includeHighlights: z
+    .boolean()
+    .optional()
+    .default(false)
+    .describe("Whether to include relevant text highlights from pages"),
+  domains: z
+    .array(z.string())
+    .optional()
+    .describe(
+      'Array of domains to include in search (e.g., ["github.com", "stackoverflow.com"])',
+    ),
+  excludeDomains: z
+    .array(z.string())
+    .optional()
+    .describe("Array of domains to exclude from search"),
+  startCrawlDate: z
+    .string()
+    .optional()
+    .describe("Start date for content crawling in YYYY-MM-DD format"),
+  endCrawlDate: z
+    .string()
+    .optional()
+    .describe("End date for content crawling in YYYY-MM-DD format"),
+  startPublishedDate: z
+    .string()
+    .optional()
+    .describe("Start date for content publishing in YYYY-MM-DD format"),
+  endPublishedDate: z
+    .string()
+    .optional()
+    .describe("End date for content publishing in YYYY-MM-DD format"),
+});
+
+export type WebSearchArgs = z.infer<typeof WebSearchSchema>;
diff --git a/apps/webapp/app/trigger/utils/utils.ts b/apps/webapp/app/trigger/utils/utils.ts
index db3d586..8e7fea8 100644
--- a/apps/webapp/app/trigger/utils/utils.ts
+++ b/apps/webapp/app/trigger/utils/utils.ts
@@ -12,10 +12,15 @@ import {
 import { logger } from "@trigger.dev/sdk/v3";
 import { type CoreMessage } from "ai";
 
-import { type HistoryStep } from "./types";
+import {
+  type WebSearchArgs,
+  type WebSearchResult,
+  type HistoryStep,
+} from "./types";
 import axios from "axios";
 import nodeCrypto from "node:crypto";
 import { customAlphabet, nanoid } from "nanoid";
+import { Exa } from "exa-js";
 
 const prisma = new PrismaClient();
 
@@ -542,3 +547,69 @@ export async function deletePersonalAccessToken(tokenId: string) {
     },
   });
 }
+
+export async function webSearch(args: WebSearchArgs): Promise<WebSearchResult> {
+  const apiKey = process.env.EXA_API_KEY;
+
+  if (!apiKey) {
+    throw new Error(
+      "EXA_API_KEY environment variable is required for web search",
+    );
+  }
+
+  const exa = new Exa(apiKey);
+
+  try {
+    const searchOptions = {
+      numResults: args.numResults || 5,
+      ...(args.domains && { includeDomains: args.domains }),
+      ...(args.excludeDomains && { excludeDomains: args.excludeDomains }),
+      ...(args.startCrawlDate && { startCrawlDate: args.startCrawlDate }),
+      ...(args.endCrawlDate && { endCrawlDate: args.endCrawlDate }),
+      ...(args.startPublishedDate && {
+        startPublishedDate: args.startPublishedDate,
+      }),
+      ...(args.endPublishedDate && { endPublishedDate: args.endPublishedDate }),
+    };
+
+    let result;
+
+    if (args.includeContent || args.includeHighlights) {
+      // Use searchAndContents for rich results
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const contentsOptions: any = {
+        ...searchOptions,
+      };
+
+      if (args.includeContent) {
+        contentsOptions.text = true;
+      }
+
+      if (args.includeHighlights) {
+        contentsOptions.highlights = true;
+      }
+
+      result = await exa.searchAndContents(args.query, contentsOptions);
+    } else {
+      // Use basic search for URLs only
+      result = await exa.search(args.query, searchOptions);
+    }
+
+    return {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      results: result.results.map((item: any) => ({
+        title: item.title,
+        url: item.url,
+        content: item.text,
+        publishedDate: item.publishedDate,
+        highlights: item.highlights,
+        text: item.text,
+        score: item.score,
+      })),
+    };
+  } catch (error) {
+    throw new Error(
+      `Web search failed: ${error instanceof Error ? error.message : "Unknown error"}`,
+    );
+  }
+}
diff --git a/apps/webapp/package.json b/apps/webapp/package.json
index 87b4bf4..14b6e55 100644
--- a/apps/webapp/package.json
+++ b/apps/webapp/package.json
@@ -86,6 +86,7 @@
     "date-fns": "^4.1.0",
     "dayjs": "^1.11.10",
     "emails": "workspace:*",
+    "exa-js": "^1.8.20",
     "execa": "^9.6.0",
     "express": "^4.18.1",
     "fast-sort": "^3.4.0",
diff --git a/apps/webapp/prisma/schema.prisma b/apps/webapp/prisma/schema.prisma
index 2d80e73..996ed8c 100644
--- a/apps/webapp/prisma/schema.prisma
+++ b/apps/webapp/prisma/schema.prisma
@@ -146,6 +146,24 @@ model IngestionQueue {
   processedAt DateTime?
 }
 
+model IngestionRule {
+  id        String    @id @default(uuid())
+  createdAt DateTime  @default(now())
+  updatedAt DateTime  @updatedAt
+  deleted   DateTime?
+
+  name     String? // Optional human-readable rule name
+  text     String // Free-flowing text rule description (mandatory)
+  source   String // Source/integration this rule applies to (mandatory)
+  isActive Boolean @default(true) // Enable/disable rule (mandatory)
+
+  workspace   Workspace @relation(fields: [workspaceId], references: [id])
+  workspaceId String
+
+  user   User   @relation(fields: [userId], references: [id])
+  userId String
+}
+
 model IntegrationAccount {
   id        String    @id @default(uuid())
   createdAt DateTime  @default(now())
@@ -296,6 +314,7 @@ model User {
   WebhookConfiguration WebhookConfiguration[]
   Conversation         Conversation[]
   ConversationHistory  ConversationHistory[]
+  IngestionRule        IngestionRule[]
 }
 
 model WebhookConfiguration {
@@ -351,6 +370,7 @@ model Workspace {
   Activity                Activity[]
   WebhookConfiguration    WebhookConfiguration[]
   Conversation            Conversation[]
+  IngestionRule           IngestionRule[]
 }
 
 enum AuthenticationMethod {
diff --git a/packages/database/prisma/migrations/20250712032515_add_ingestion_rule_model/migration.sql b/packages/database/prisma/migrations/20250712032515_add_ingestion_rule_model/migration.sql
new file mode 100644
index 0000000..79f8f94
--- /dev/null
+++ b/packages/database/prisma/migrations/20250712032515_add_ingestion_rule_model/migration.sql
@@ -0,0 +1,34 @@
+/*
+  Warnings:
+
+  - You are about to drop the column `preferences` on the `User` table. All the data in the column will be lost.
+  - You are about to drop the column `preferences` on the `Workspace` table. All the data in the column will be lost.
+
+*/
+-- AlterTable
+ALTER TABLE "User" DROP COLUMN "preferences";
+
+-- AlterTable
+ALTER TABLE "Workspace" DROP COLUMN "preferences";
+
+-- CreateTable
+CREATE TABLE "IngestionRule" (
+    "id" TEXT NOT NULL,
+    "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updatedAt" TIMESTAMP(3) NOT NULL,
+    "deleted" TIMESTAMP(3),
+    "name" TEXT,
+    "text" TEXT NOT NULL,
+    "source" TEXT NOT NULL,
+    "isActive" BOOLEAN NOT NULL DEFAULT true,
+    "workspaceId" TEXT NOT NULL,
+    "userId" TEXT NOT NULL,
+
+    CONSTRAINT "IngestionRule_pkey" PRIMARY KEY ("id")
+);
+
+-- AddForeignKey
+ALTER TABLE "IngestionRule" ADD CONSTRAINT "IngestionRule_workspaceId_fkey" FOREIGN KEY ("workspaceId") REFERENCES "Workspace"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
+
+-- AddForeignKey
+ALTER TABLE "IngestionRule" ADD CONSTRAINT "IngestionRule_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE RESTRICT ON UPDATE CASCADE;
diff --git a/packages/database/prisma/schema.prisma b/packages/database/prisma/schema.prisma
index 2d80e73..996ed8c 100644
--- a/packages/database/prisma/schema.prisma
+++ b/packages/database/prisma/schema.prisma
@@ -146,6 +146,24 @@ model IngestionQueue {
   processedAt DateTime?
 }
 
+model IngestionRule {
+  id        String    @id @default(uuid())
+  createdAt DateTime  @default(now())
+  updatedAt DateTime  @updatedAt
+  deleted   DateTime?
+
+  name     String? // Optional human-readable rule name
+  text     String // Free-flowing text rule description (mandatory)
+  source   String // Source/integration this rule applies to (mandatory)
+  isActive Boolean @default(true) // Enable/disable rule (mandatory)
+
+  workspace   Workspace @relation(fields: [workspaceId], references: [id])
+  workspaceId String
+
+  user   User   @relation(fields: [userId], references: [id])
+  userId String
+}
+
 model IntegrationAccount {
   id        String    @id @default(uuid())
   createdAt DateTime  @default(now())
@@ -296,6 +314,7 @@ model User {
   WebhookConfiguration WebhookConfiguration[]
   Conversation         Conversation[]
   ConversationHistory  ConversationHistory[]
+  IngestionRule        IngestionRule[]
 }
 
 model WebhookConfiguration {
@@ -351,6 +370,7 @@ model Workspace {
   Activity                Activity[]
   WebhookConfiguration    WebhookConfiguration[]
   Conversation            Conversation[]
+  IngestionRule           IngestionRule[]
 }
 
 enum AuthenticationMethod {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index a9ea7ad..c304a32 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -252,6 +252,9 @@ importers:
       emails:
         specifier: workspace:*
         version: link:../../packages/emails
+      exa-js:
+        specifier: ^1.8.20
+        version: 1.8.20(encoding@0.1.13)
       execa:
         specifier: ^9.6.0
         version: 9.6.0
@@ -5212,6 +5215,9 @@ packages:
     engines: {node: '>=10.14', npm: '>=6', yarn: '>=1'}
     hasBin: true
 
+  cross-fetch@4.1.0:
+    resolution: {integrity: sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw==}
+
   cross-spawn@5.1.0:
     resolution: {integrity: sha512-pTgQJ5KC0d2hcY8eyL1IzlBPYjTkyH72XRZPnLyKus2mBfNjQs3klqbJU2VILqZryAZUt9JOb3h/mWMy23/f5A==}
 
@@ -5610,6 +5616,10 @@ packages:
     resolution: {integrity: sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==}
     engines: {node: '>=12'}
 
+  dotenv@16.4.7:
+    resolution: {integrity: sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==}
+    engines: {node: '>=12'}
+
   dotenv@16.5.0:
     resolution: {integrity: sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==}
     engines: {node: '>=12'}
@@ -6018,6 +6028,9 @@ packages:
   evt@2.5.9:
     resolution: {integrity: sha512-GpjX476FSlttEGWHT8BdVMoI8wGXQGbEOtKcP4E+kggg+yJzXBZN2n4x7TS/zPBJ1DZqWI+rguZZApjjzQ0HpA==}
 
+  exa-js@1.8.20:
+    resolution: {integrity: sha512-FNxoaBOmGyfb4NoWn3Bf5QQGOeKGW7SZ0z4p7qRyoFueH8671XQ6VkYLLriaWSZQ42h7+7O9bAjLXBdYuQ2XMw==}
+
   execa@5.1.1:
     resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==}
     engines: {node: '>=10'}
@@ -7790,6 +7803,18 @@ packages:
     resolution: {integrity: sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==}
     engines: {node: '>=12'}
 
+  openai@5.9.0:
+    resolution: {integrity: sha512-cmLC0pfqLLhBGxE4aZPyRPjydgYCncppV2ClQkKmW79hNjCvmzkfhz8rN5/YVDmjVQlFV+UsF1JIuNjNgeagyQ==}
+    hasBin: true
+    peerDependencies:
+      ws: ^8.18.0
+      zod: ^3.23.8
+    peerDependenciesMeta:
+      ws:
+        optional: true
+      zod:
+        optional: true
+
   optionator@0.9.4:
     resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==}
     engines: {node: '>= 0.8.0'}
@@ -15268,6 +15293,12 @@ snapshots:
     dependencies:
       cross-spawn: 7.0.6
 
+  cross-fetch@4.1.0(encoding@0.1.13):
+    dependencies:
+      node-fetch: 2.7.0(encoding@0.1.13)
+    transitivePeerDependencies:
+      - encoding
+
   cross-spawn@5.1.0:
     dependencies:
       lru-cache: 4.1.5
@@ -15675,6 +15706,8 @@ snapshots:
 
   dotenv@16.0.3: {}
 
+  dotenv@16.4.7: {}
+
   dotenv@16.5.0: {}
 
   dotenv@8.6.0: {}
@@ -16371,6 +16404,17 @@ snapshots:
       run-exclusive: 2.2.19
       tsafe: 1.8.5
 
+  exa-js@1.8.20(encoding@0.1.13):
+    dependencies:
+      cross-fetch: 4.1.0(encoding@0.1.13)
+      dotenv: 16.4.7
+      openai: 5.9.0(zod@3.23.8)
+      zod: 3.23.8
+      zod-to-json-schema: 3.24.5(zod@3.23.8)
+    transitivePeerDependencies:
+      - encoding
+      - ws
+
   execa@5.1.1:
     dependencies:
       cross-spawn: 7.0.6
@@ -18570,6 +18614,10 @@ snapshots:
     dependencies:
       mimic-fn: 4.0.0
 
+  openai@5.9.0(zod@3.23.8):
+    optionalDependencies:
+      zod: 3.23.8
+
   optionator@0.9.4:
     dependencies:
       deep-is: 0.1.4