Fix: Semantic Search issue (#89)

* Fix: normalization prompt

* Fix: improve knowledge graph and better recall

* fix: add user context to search reranking

* fix: in search log the source

* fix: remove harcoded limit

---------

Co-authored-by: Harshith Mullapudi <harshithmullapudi@gmail.com>
This commit is contained in:
Manoj 2025-10-06 14:06:52 +05:30 committed by GitHub
parent 3d1b93d97d
commit 27f8740691
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 580 additions and 734 deletions

View File

@ -83,8 +83,9 @@ export async function makeModelCall(
const generateTextOptions: any = {} const generateTextOptions: any = {}
logger.info(
console.log('complexity:', complexity, 'model:', model) `complexity: ${complexity}, model: ${model}`,
);
switch (model) { switch (model) {
case "gpt-4.1-2025-04-14": case "gpt-4.1-2025-04-14":
case "gpt-4.1-mini-2025-04-14": case "gpt-4.1-mini-2025-04-14":

View File

@ -1,315 +0,0 @@
import { json } from "@remix-run/node";
import { randomUUID } from "node:crypto";
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
import { z } from "zod";
import { createHybridActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
import { addToQueue } from "~/lib/ingest.server";
import { SearchService } from "~/services/search.server";
import { handleTransport } from "~/utils/mcp";
import { SpaceService } from "~/services/space.server";
import { EpisodeTypeEnum } from "@core/types";
// Map to store transports by session ID with cleanup tracking
const transports: {
[sessionId: string]: {
transport: StreamableHTTPServerTransport;
createdAt: number;
};
} = {};
// MCP request body schema
const MCPRequestSchema = z.object({}).passthrough();
const SourceParams = z.object({
source: z.string().optional(),
});
// Search parameters schema for MCP tool
const SearchParamsSchema = z.object({
query: z.string().describe("The search query in third person perspective"),
validAt: z.string().optional().describe("The valid at time in ISO format"),
startTime: z.string().optional().describe("The start time in ISO format"),
endTime: z.string().optional().describe("The end time in ISO format"),
spaceIds: z
.array(z.string())
.optional()
.describe("Array of strings representing UUIDs of spaces"),
});
const IngestSchema = z.object({
message: z.string().describe("The data to ingest in text format"),
});
const searchService = new SearchService();
const spaceService = new SpaceService();
// Handle MCP HTTP requests properly
const handleMCPRequest = async (
request: Request,
body: any,
authentication: any,
params: z.infer<typeof SourceParams>,
) => {
const sessionId = request.headers.get("mcp-session-id") as string | undefined;
const source =
(request.headers.get("source") as string | undefined) ??
(params.source as string | undefined);
if (!source) {
return json(
{
jsonrpc: "2.0",
error: {
code: -32601,
message: "No source found",
},
id: null,
},
{ status: 400 },
);
}
let transport: StreamableHTTPServerTransport;
try {
if (sessionId && transports[sessionId]) {
// Reuse existing transport
transport = transports[sessionId].transport;
} else if (!sessionId && isInitializeRequest(body)) {
// New initialization request
transport = new StreamableHTTPServerTransport({
sessionIdGenerator: () => randomUUID(),
onsessioninitialized: (sessionId) => {
// Store the transport by session ID with timestamp
transports[sessionId] = {
transport,
createdAt: Date.now(),
};
},
});
// Clean up transport when closed
transport.onclose = () => {
if (transport.sessionId) {
delete transports[transport.sessionId];
}
};
const server = new McpServer(
{
name: "echo-memory-server",
version: "1.0.0",
},
{
capabilities: {
tools: {},
},
},
);
// Register ingest tool
server.registerTool(
"ingest",
{
title: "Ingest Data",
description: "Ingest data into the memory system",
inputSchema: IngestSchema.shape,
},
async (args) => {
try {
const userId = authentication.userId;
const response = addToQueue(
{
episodeBody: args.message,
referenceTime: new Date().toISOString(),
source,
type: EpisodeTypeEnum.CONVERSATION,
},
userId,
);
return {
content: [
{
type: "text",
text: JSON.stringify(response),
},
],
};
} catch (error) {
console.error("MCP ingest error:", error);
return {
content: [
{
type: "text",
text: `Error ingesting data: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
},
);
// Register search tool
server.registerTool(
"search",
{
title: "Search Data",
description: "Search through ingested data",
inputSchema: SearchParamsSchema.shape,
},
async (args) => {
try {
const userId = authentication.userId;
const results = await searchService.search(args.query, userId, {
startTime: args.startTime ? new Date(args.startTime) : undefined,
endTime: args.endTime ? new Date(args.endTime) : undefined,
});
return {
content: [
{
type: "text",
text: JSON.stringify(results),
},
],
};
} catch (error) {
console.error("MCP search error:", error);
return {
content: [
{
type: "text",
text: `Error searching: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
},
);
// Register search tool
server.registerTool(
"get_spaces",
{
title: "Get spaces",
description: "Get spaces in memory",
},
async () => {
try {
const userId = authentication.userId;
const spaces = await spaceService.getUserSpaces(userId);
return {
content: [
{
type: "text",
text: JSON.stringify(spaces),
},
],
isError: false,
};
} catch (error) {
console.error("Spaces error:", error);
return {
content: [
{
type: "text",
text: `Error getting spaces`,
},
],
isError: true,
};
}
},
);
// Connect to the MCP server
await server.connect(transport);
} else {
// Invalid request
throw new Error("Bad Request: No valid session ID provided");
}
const response = await handleTransport(transport, request, body);
return response;
} catch (error) {
console.error("MCP request error:", error);
return json(
{
jsonrpc: "2.0",
error: {
code: -32000,
message:
error instanceof Error ? error.message : "Internal server error",
},
id: body?.id || null,
},
{ status: 500 },
);
}
};
// Handle DELETE requests for session cleanup
const handleDelete = async (request: Request, authentication: any) => {
const sessionId = request.headers.get("mcp-session-id") as string | undefined;
if (!sessionId || !transports[sessionId]) {
return new Response("Invalid or missing session ID", { status: 400 });
}
const transport = transports[sessionId].transport;
// Clean up transport
transport.close();
delete transports[sessionId];
return new Response(null, { status: 204 });
};
const { action, loader } = createHybridActionApiRoute(
{
body: MCPRequestSchema,
searchParams: SourceParams,
allowJWT: true,
authorization: {
action: "mcp",
},
corsStrategy: "all",
},
async ({ body, authentication, request, searchParams }) => {
const method = request.method;
if (method === "POST") {
return await handleMCPRequest(
request,
body,
authentication,
searchParams,
);
} else if (method === "DELETE") {
return await handleDelete(request, authentication);
} else {
return json(
{
jsonrpc: "2.0",
error: {
code: -32601,
message: "Method not allowed",
},
id: null,
},
{ status: 405 },
);
}
},
);
export { action, loader };

View File

@ -278,7 +278,7 @@ export default function BillingSettings() {
</div> </div>
<div className="space-y-2"> <div className="space-y-2">
<div className="flex justify-between text-sm"> <div className="flex justify-between text-sm">
<span className="text-muted-foreground">Episodes</span> <span className="text-muted-foreground">Facts</span>
<span className="font-medium"> <span className="font-medium">
{usageSummary.usage.episodes} {usageSummary.usage.episodes}
</span> </span>

View File

@ -76,16 +76,19 @@ export async function findSimilarEntities(params: {
threshold: number; threshold: number;
userId: string; userId: string;
}): Promise<EntityNode[]> { }): Promise<EntityNode[]> {
const limit = params.limit || 5;
const query = ` const query = `
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding) CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
YIELD node AS entity, score YIELD node AS entity
WHERE entity.userId = $userId
WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
WHERE score >= $threshold WHERE score >= $threshold
AND entity.userId = $userId
RETURN entity, score RETURN entity, score
ORDER BY score DESC ORDER BY score DESC
LIMIT ${limit}
`; `;
const result = await runQuery(query, { ...params, topK: params.limit }); const result = await runQuery(query, { ...params });
return result.map((record) => { return result.map((record) => {
const entity = record.get("entity").properties; const entity = record.get("entity").properties;
@ -110,17 +113,20 @@ export async function findSimilarEntitiesWithSameType(params: {
threshold: number; threshold: number;
userId: string; userId: string;
}): Promise<EntityNode[]> { }): Promise<EntityNode[]> {
const limit = params.limit || 5;
const query = ` const query = `
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding) CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
YIELD node AS entity, score YIELD node AS entity
WHERE score >= $threshold WHERE entity.userId = $userId
AND entity.userId = $userId
AND entity.type = $entityType AND entity.type = $entityType
WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
WHERE score >= $threshold
RETURN entity, score RETURN entity, score
ORDER BY score DESC ORDER BY score DESC
LIMIT ${limit}
`; `;
const result = await runQuery(query, { ...params, topK: params.limit }); const result = await runQuery(query, { ...params });
return result.map((record) => { return result.map((record) => {
const entity = record.get("entity").properties; const entity = record.get("entity").properties;

View File

@ -138,19 +138,21 @@ export async function searchEpisodesByEmbedding(params: {
limit?: number; limit?: number;
minSimilarity?: number; minSimilarity?: number;
}) { }) {
const limit = params.limit || 100;
const query = ` const query = `
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding) CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
YIELD node AS episode, score YIELD node AS episode
WHERE episode.userId = $userId WHERE episode.userId = $userId
AND score >= $minSimilarity WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
WHERE score >= $minSimilarity
RETURN episode, score RETURN episode, score
ORDER BY score DESC`; ORDER BY score DESC
LIMIT ${limit}`;
const result = await runQuery(query, { const result = await runQuery(query, {
embedding: params.embedding, embedding: params.embedding,
minSimilarity: params.minSimilarity, minSimilarity: params.minSimilarity,
userId: params.userId, userId: params.userId,
topK: 100,
}); });
if (!result || result.length === 0) { if (!result || result.length === 0) {
@ -281,20 +283,22 @@ export async function getRelatedEpisodesEntities(params: {
limit?: number; limit?: number;
minSimilarity?: number; minSimilarity?: number;
}) { }) {
const limit = params.limit || 100;
const query = ` const query = `
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding) CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
YIELD node AS episode, score YIELD node AS episode
WHERE episode.userId = $userId WHERE episode.userId = $userId
AND score >= $minSimilarity WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
WHERE score >= $minSimilarity
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity) OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
WHERE entity IS NOT NULL WHERE entity IS NOT NULL
RETURN DISTINCT entity`; RETURN DISTINCT entity
LIMIT ${limit}`;
const result = await runQuery(query, { const result = await runQuery(query, {
embedding: params.embedding, embedding: params.embedding,
minSimilarity: params.minSimilarity, minSimilarity: params.minSimilarity,
userId: params.userId, userId: params.userId,
topK: params.limit || 100,
}); });
return result return result

View File

@ -211,15 +211,18 @@ export async function findSimilarStatements({
excludeIds?: string[]; excludeIds?: string[];
userId: string; userId: string;
}): Promise<Omit<StatementNode, "factEmbedding">[]> { }): Promise<Omit<StatementNode, "factEmbedding">[]> {
const limit = 100;
const query = ` const query = `
CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding) CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $factEmbedding)
YIELD node AS statement, score YIELD node AS statement
WHERE statement.userId = $userId WHERE statement.userId = $userId
AND statement.invalidAt IS NULL AND statement.invalidAt IS NULL
AND score >= $threshold
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""} ${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
WITH statement, gds.similarity.cosine(statement.factEmbedding, $factEmbedding) AS score
WHERE score >= $threshold
RETURN statement, score RETURN statement, score
ORDER BY score DESC ORDER BY score DESC
LIMIT ${limit}
`; `;
const result = await runQuery(query, { const result = await runQuery(query, {
@ -227,7 +230,6 @@ export async function findSimilarStatements({
threshold, threshold,
excludeIds, excludeIds,
userId, userId,
topK: 100,
}); });
if (!result || result.length === 0) { if (!result || result.length === 0) {
@ -410,14 +412,17 @@ export async function searchStatementsByEmbedding(params: {
limit?: number; limit?: number;
minSimilarity?: number; minSimilarity?: number;
}) { }) {
const limit = params.limit || 100;
const query = ` const query = `
CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding) CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $embedding)
YIELD node AS statement, score YIELD node AS statement
WHERE statement.userId = $userId WHERE statement.userId = $userId
AND statement.invalidAt IS NULL AND statement.invalidAt IS NULL
AND score >= $minSimilarity WITH statement, gds.similarity.cosine(statement.factEmbedding, $embedding) AS score
WHERE score >= $minSimilarity
RETURN statement, score RETURN statement, score
ORDER BY score DESC ORDER BY score DESC
LIMIT ${limit}
`; `;
const result = await runQuery(query, { const result = await runQuery(query, {
@ -425,7 +430,6 @@ export async function searchStatementsByEmbedding(params: {
minSimilarity: params.minSimilarity, minSimilarity: params.minSimilarity,
limit: params.limit, limit: params.limit,
userId: params.userId, userId: params.userId,
topK: params.limit || 100,
}); });
if (!result || result.length === 0) { if (!result || result.length === 0) {

View File

@ -78,7 +78,24 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- Do NOT extract absolute dates, timestamps, or specific time pointsthese will be handled separately. - Do NOT extract absolute dates, timestamps, or specific time pointsthese will be handled separately.
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm"). - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
8. **Entity Name Extraction**: 8. **Entity Usefulness Test - SELECTIVITY FILTER**:
Before extracting an entity, ask: "Would this be useful in a search query?"
EXTRACT (Searchable, persistent concepts):
- Named entities: "Sarah", "OpenAI", "Boston", "Albert Heijn"
- Domain concepts: "Preferences", "Home Address", "Annual Review", "Coding Practice"
- Measurements: "10/10 rating", "$2.5 million", "75% completion"
- Abstract concepts: "Lean Startup", "DevOps Culture", "Quality Standards"
SKIP (Transient descriptors, low search value):
- Descriptive phrases: "new files", "existing code", "good practice", "necessary changes"
- Generic qualifiers: "better approach", "current version", "recent updates"
- Verb phrases: "creating documentation", "editing files", "avoiding mistakes"
- Adjective+noun combinations without specificity: "important meeting", "quick fix"
**GUIDELINE**: Extract stable concepts that persist across contexts. Skip ephemeral descriptors tied to single actions.
9. **Entity Name Extraction**:
- Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers - Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
- When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car" - When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car"
- When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities - When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
@ -87,7 +104,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John") - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
- **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" "Space") - **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" "Space")
9. **Temporal and Relationship Context Extraction**: 10. **Temporal and Relationship Context Extraction**:
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years") - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
- EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college") - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
- EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members") - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")

View File

@ -5,16 +5,43 @@ export const normalizePrompt = (
): CoreMessage[] => { ): CoreMessage[] => {
const sysPrompt = `You are C.O.R.E. (Contextual Observation & Recall Engine), a smart memory enrichment system. const sysPrompt = `You are C.O.R.E. (Contextual Observation & Recall Engine), a smart memory enrichment system.
Create ONE enriched sentence that transforms the episode into a contextually-rich memory using SELECTIVE enrichment. Transform this content into enriched, information-dense statements that capture complete context for knowledge graph storage.
CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION from the episode. Every separate fact, preference, request, clarification, or detail mentioned must be preserved in your enriched output. Missing information is unacceptable. CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION. Every separate fact, preference, request, clarification, specification, or detail mentioned must be preserved in your enriched output. Missing information is unacceptable.
OUTPUT GUIDELINES:
- Simple content (1-2 facts): Use 1-2 concise sentences
- Complex content (multiple facts/categories): Use multiple focused paragraphs, each covering ONE topic area
- Technical content: Preserve specifications, commands, paths, version numbers, configurations
- Let content complexity determine output length - completeness over arbitrary brevity
- IMPORTANT: Break complex content into digestible paragraphs with natural sentence boundaries for easier fact extraction
<enrichment_strategy> <enrichment_strategy>
1. PRIMARY FACTS - Always preserve the core information from the episode 1. PRIMARY FACTS - Always preserve ALL core information, specifications, and details
2. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using episode timestamp 2. SPEAKER ATTRIBUTION - When content contains self-introductions ("I'm X", "My name is Y"), explicitly preserve speaker identity in third person (e.g., "the user introduced themselves as X" or "X introduced himself/herself")
3. CONTEXT ENRICHMENT - Add context ONLY when it clarifies unclear references 3. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using timestamp
4. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images 4. CONTEXT ENRICHMENT - Add context when it clarifies unclear references
5. EMOTIONAL PRESERVATION - Maintain the tone and feeling of emotional exchanges 5. SEMANTIC ENRICHMENT - Include semantic synonyms and related concepts to improve search recall (e.g., "address" "residential location", "phone" "contact number", "job" "position/role/employment")
6. ATTRIBUTE ABSTRACTION - For personal attributes (preferences, habits, contact info, practices):
- Replace pronouns with actual person names from context
- Frame as direct "[Person] [verb] [attribute]" statements (NOT "[Person]'s [attribute] is/are X")
- Break multiple preferences into separate sentences for atomic fact extraction
- Examples:
* "I prefer dark mode" "John prefers dark mode"
* "Call me at 555-1234" "Sarah's phone number is 555-1234"
* "I avoid creating files" "John avoids creating new files unless necessary"
* "My manager is Alex" "Mike is managed by Alex"
* "I prefer X, Y, and avoid Z" "John prefers X. John prefers Y. John avoids Z."
7. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
8. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges
9. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures
10. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships
CONTENT-ADAPTIVE APPROACH:
- Conversations: Focus on dialogue context, relationships, emotional tone
- Documents: Extract structured facts, technical details, categorical organization
- Code/Technical: Preserve functionality, dependencies, configurations, architectural decisions
- Structured Data: Maintain categories, hierarchies, specifications
When to add context from related memories: When to add context from related memories:
- Unclear pronouns ("she", "it", "they") resolve to specific entity - Unclear pronouns ("she", "it", "they") resolve to specific entity
@ -24,7 +51,7 @@ When to add context from related memories:
When NOT to add context: When NOT to add context:
- Clear, self-contained statements no enrichment needed beyond temporal - Clear, self-contained statements no enrichment needed beyond temporal
- Emotional responses preserve tone, avoid over-contextualization - Emotional responses preserve tone, avoid over-contextualization
- Already established topics don't repeat details mentioned earlier in conversation - Already established topics don't repeat details mentioned earlier in same session
</enrichment_strategy> </enrichment_strategy>
<temporal_resolution> <temporal_resolution>
@ -141,64 +168,76 @@ EMPTY ENCOURAGEMENT EXAMPLES (DON'T STORE these):
</quality_control> </quality_control>
<enrichment_examples> <enrichment_examples>
HIGH VALUE enrichment: SIMPLE CONVERSATION - HIGH VALUE ENRICHMENT:
- Original: "She said yes!" - Original: "She said yes!"
- Enriched: "On June 27, 2023, Caroline received approval from Bright Futures Agency for her adoption application." - Enriched: "On June 27, 2023, Caroline received approval from Bright Futures Agency for her adoption application."
- Why: Resolves unclear pronoun, adds temporal context, identifies the approving entity - Why: Resolves unclear pronoun, adds temporal context, identifies the approving entity
MINIMAL enrichment (emotional support): SIMPLE CONVERSATION - EMOTIONAL SUPPORT:
- Original: "You'll be an awesome mom! Good luck!" - Original: "You'll be an awesome mom! Good luck!"
- Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother." - Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
- Why: Simple temporal context, preserve emotional tone, no historical dumping - Why: Simple temporal context, preserve emotional tone, no historical dumping
ANTI-BLOAT example (what NOT to do): SEMANTIC ENRICHMENT FOR BETTER SEARCH:
- Wrong: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom." - Original: "My address is 123 Main St. Boston, MA 02101"
- Right: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother." - Enriched: "On October 3, 2025, the user's residential address (home location) is 123 Main St. Boston, MA 02101."
- Why: "residential address" and "home location" as synonyms improve semantic search for queries like "where does user live" or "residential location"
CLEAR REFERENCE (minimal enrichment): - Original: "Call me at 555-1234"
- Original: "Thanks, Caroline! The event was really thought-provoking." - Enriched: "On October 3, 2025, the user's phone number (contact number) is 555-1234."
- Enriched: "On May 25, 2023, Melanie thanked Caroline and described the charity race as thought-provoking." - Why: "phone number" and "contact number" as synonyms help queries like "how to contact" or "telephone"
- Why: Clear context doesn't need repetitive anchoring
CONVERSATION FLOW EXAMPLES: ATTRIBUTE ABSTRACTION FOR BETTER GRAPH RELATIONSHIPS:
WRONG (context fatigue): "reinforcing their ongoing conversation about mental health following Melanie's participation in the recent charity race for mental health" - Original: "I avoid creating new files unless necessary"
RIGHT (minimal reference): "reinforcing their conversation about mental health" - Enriched: "On October 3, 2025, John has a coding practice: avoid creating new files unless necessary."
- Why: Creates direct relationship from person to practice for better graph traversal
WRONG (compound enrichment): "as she begins the process of turning her dream of giving children a loving home into reality and considers specific adoption agencies" - Original: "I prefer editing existing code over writing new code"
RIGHT (focused): "as she begins pursuing her adoption plans" - Enriched: "On October 3, 2025, John prefers editing existing code over writing new code."
- Why: Direct preference relationship enables queries like "what are John's preferences"
WRONG (over-contextualization): "following her participation in the May 20, 2023 charity race for mental health awareness" - Original: "My manager is Sarah"
RIGHT (after first mention): "following the recent charity race" - Enriched: "On October 3, 2025, Alex is managed by Sarah."
- Why: Direct reporting relationship instead of intermediate "manager" entity
GENERIC IDENTITY PRESERVATION EXAMPLES: COMPLEX TECHNICAL CONTENT - COMPREHENSIVE EXTRACTION:
- Original: "my hometown, Boston" Enriched: "Boston, [person]'s hometown" - Original: "Working on e-commerce site with Next.js 14. Run pnpm dev to start at port 3000. Using Prisma with PostgreSQL, Stripe for payments, Redis for caching. API routes in /api/*, database migrations in /prisma/migrations."
- Original: "my workplace, Google" Enriched: "Google, [person]'s workplace" - Enriched: "On January 15, 2024, the user is developing an e-commerce site built with Next.js 14. Development setup: pnpm dev starts local server on port 3000. Technology stack: Prisma ORM with PostgreSQL database, Stripe integration for payment processing, Redis for caching. Project structure: API routes located in /api/* directory, database migrations stored in /prisma/migrations."
- Original: "my sister, Sarah" Enriched: "Sarah, [person]'s sister" - Why: Preserves ALL technical details, commands, ports, technologies, file paths, dependencies in organized readable format
- Original: "from my university, MIT" Enriched: "from MIT, [person]'s university"
POSSESSIVE + APPOSITIVE PATTERNS (Critical for Relations): STRUCTURED PREFERENCES:
- Original: "my colleague at my office, Microsoft" - Original: "I prefer minimalist design, dark mode by default, keyboard shortcuts for navigation, and hate pop-up notifications"
- Enriched: "his colleague at Microsoft, David's workplace" - Enriched: "On March 10, 2024, the user documented their UI/UX preferences: prefers minimalist design aesthetic, dark mode as default theme, keyboard shortcuts for primary navigation, and dislikes pop-up notifications."
- Why: Preserves both the work relationship AND the employment identity - Why: Maintains all distinct preferences as clear, searchable facts
- Original: "my friend from my university, Stanford" SELF-INTRODUCTION - SPEAKER ATTRIBUTION:
- Enriched: "her friend from Stanford, Lisa's alma mater" - Original: "I'm John. I'm a Developer. My primary goal with CORE is to build a personal memory system."
- Why: Establishes both the friendship and educational institution identity - Enriched: "On October 2, 2025, the user introduced themselves as John, a Developer. John's primary goal with CORE is to build a personal memory system."
- Why: Explicitly preserves speaker identity and self-introduction context for proper attribution
- Original: "my neighbor in my city, Chicago" - Original: "Hi, my name is Sarah and I work at Meta as a product manager"
- Enriched: "his neighbor in Chicago, Mark's hometown" - Enriched: "On January 20, 2024, the user introduced themselves as Sarah, a product manager at Meta."
- Why: Maintains both the neighbor relationship and residence identity - Why: Captures self-identification with name, role, and organization attribution
WRONG (loses relationships): reduces to just entity names without preserving the defining relationship ANTI-BLOAT (what NOT to do):
RIGHT (preserves identity): maintains the possessive/definitional connection that establishes entity relationships WRONG: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom."
RIGHT: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
WRONG (run-on mega-sentence): Cramming 10+ facts into single 200+ word sentence with no structure
RIGHT (organized): Multiple clear sentences or structured paragraphs with natural boundaries
IDENTITY PRESERVATION:
- Original: "my hometown, Boston" "Boston, [person]'s hometown"
- Original: "my colleague at Microsoft" "colleague at Microsoft, [person]'s workplace"
- Why: Maintains possessive/definitional connections establishing entity relationships
</enrichment_examples> </enrichment_examples>
CRITICAL OUTPUT FORMAT REQUIREMENT: CRITICAL OUTPUT FORMAT REQUIREMENT:
You MUST wrap your response in <output> tags. This is MANDATORY - no exceptions. You MUST wrap your response in <output> tags. This is MANDATORY - no exceptions.
If the episode should be stored in memory: If the content should be stored in memory:
<output> <output>
{{your_enriched_sentence_here}} {{your_enriched_output_here}}
</output> </output>
If there is nothing worth remembering: If there is nothing worth remembering:
@ -209,10 +248,10 @@ NOTHING_TO_REMEMBER
FAILURE TO USE <output> TAGS WILL RESULT IN EMPTY NORMALIZATION AND SYSTEM FAILURE. FAILURE TO USE <output> TAGS WILL RESULT IN EMPTY NORMALIZATION AND SYSTEM FAILURE.
FORMAT EXAMPLES: FORMAT EXAMPLES:
CORRECT: <output>On May 25, 2023, Caroline shared her adoption plans with Melanie.</output> CORRECT (simple): <output>On May 25, 2023, Caroline shared her adoption plans with Melanie.</output>
CORRECT (technical): <output>On January 15, 2024, the user is developing an e-commerce site with Next.js 14. Development: pnpm dev on port 3000. Stack: Prisma with PostgreSQL, Stripe payments, Redis caching. Structure: API routes in /api/*, migrations in /prisma/migrations.</output>
CORRECT: <output>NOTHING_TO_REMEMBER</output> CORRECT: <output>NOTHING_TO_REMEMBER</output>
WRONG: On May 25, 2023, Caroline shared her adoption plans with Melanie. WRONG: Missing <output> tags entirely
WRONG: NOTHING_TO_REMEMBER
ALWAYS include opening <output> and closing </output> tags around your entire response. ALWAYS include opening <output> and closing </output> tags around your entire response.
`; `;

View File

@ -72,6 +72,53 @@ For each entity, systematically check these common patterns:
- Complex multi-hop inferences - Complex multi-hop inferences
- Implicit relationships requiring interpretation - Implicit relationships requiring interpretation
## DIRECT RELATIONSHIP PRIORITY
ALWAYS create direct subjectpredicateobject relationships. Avoid intermediate container entities that add unnecessary graph hops.
PREFERRED (1-hop traversal, optimal recall):
- "Sarah's manager is Mike" Sarah managed_by Mike
- "Alex prefers dark mode" Alex prefers "dark mode"
- "Office in Boston" Office located_in Boston
- "User avoids creating files" User avoids "creating new files"
- "Home address is 123 Main St" User has_home_address "123 Main St, Boston"
AVOID (2-hop traversal, poor recall):
- Sarah has Manager [then] Manager is Mike (adds extra hop)
- Alex has Preferences [then] Preferences includes "dark mode" (adds extra hop)
- Office has Location [then] Location is_in Boston (adds extra hop)
## ATOMIC BUT CONTEXTUAL FACTS
When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
GOOD (Atomic + Contextual):
- "Sarah prefers morning workouts at the gym"
- "Family orders pizza for Friday movie nights"
- "Alex drinks green tea when working late"
- "Doctor recommends stretching exercises for back pain"
- "Team celebrates birthdays with lunch outings"
- "Maria reads fiction books during vacation"
BAD (Atomic but Decontextualized - loses scope):
- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
- "Family orders pizza" (when? weekends? special occasions? always?)
- "Alex drinks green tea" (when? all day? specific times? why?)
- "Doctor recommends stretching" (for what? general health? specific condition?)
- "Team celebrates birthdays" (how? where? what tradition?)
- "Maria reads fiction books" (when? always? specific contexts?)
**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
**Intermediate Entity Exception**: Only create intermediate entities if they represent meaningful concepts with multiple distinct properties:
- "Employment Contract 2024" (has salary, duration, benefits, start_date, role, etc.)
- "Annual Performance Review" (has ratings, achievements, goals, feedback, etc.)
- "User Preferences" (just a container for preference values - use direct User prefers X)
- "Manager" (just points to a person - use direct Sarah managed_by Mike)
- "Home Address" (just holds an address - use direct User has_home_address "address")
**Guideline**: If the intermediate entity would have only 1-2 properties, make it a direct relationship instead.
CRITICAL REQUIREMENT: CRITICAL REQUIREMENT:
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects. - You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
- The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list. - The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list.
@ -102,15 +149,6 @@ Follow these instructions:
- predicate: The relationship type (can be a descriptive phrase) - predicate: The relationship type (can be a descriptive phrase)
- target: The object entity (MUST be from AVAILABLE ENTITIES) - target: The object entity (MUST be from AVAILABLE ENTITIES)
## SAME-NAME ENTITY RELATIONSHIP FORMATION
When entities share identical names but have different types, CREATE explicit relationship statements:
- **Person-Organization**: "John (Person)" "owns", "founded", "works for", or "leads" "John (Company)"
- **Person-Location**: "Smith (Person)" "lives in", "founded", or "is associated with" "Smith (City)"
- **Event-Location**: "Conference (Event)" "takes place at" or "is hosted by" "Conference (Venue)"
- **Product-Company**: "Tesla (Product)" "is manufactured by" or "is developed by" "Tesla (Company)"
- **MANDATORY**: Always create at least one relationship statement for same-name entities
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
## DURATION AND TEMPORAL CONTEXT ENTITY USAGE ## DURATION AND TEMPORAL CONTEXT ENTITY USAGE
When Duration or TemporalContext entities are available in AVAILABLE ENTITIES: When Duration or TemporalContext entities are available in AVAILABLE ENTITIES:
- **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements - **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements
@ -307,6 +345,28 @@ Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, H
**Reference**: Document references Entity **Reference**: Document references Entity
**Employment**: Person works_for Organization **Employment**: Person works_for Organization
## ATOMIC BUT CONTEXTUAL FACTS
When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
GOOD (Atomic + Contextual):
- "Sarah prefers morning workouts at the gym"
- "Family orders pizza for Friday movie nights"
- "Alex drinks green tea when working late"
- "Doctor recommends stretching exercises for back pain"
- "Team celebrates birthdays with lunch outings"
- "Maria reads fiction books during vacation"
BAD (Atomic but Decontextualized - loses scope):
- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
- "Family orders pizza" (when? weekends? special occasions? always?)
- "Alex drinks green tea" (when? all day? specific times? why?)
- "Doctor recommends stretching" (for what? general health? specific condition?)
- "Team celebrates birthdays" (how? where? what tradition?)
- "Maria reads fiction books" (when? always? specific contexts?)
**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
## RELATIONSHIP QUALITY HIERARCHY ## RELATIONSHIP QUALITY HIERARCHY
## RELATIONSHIP TEMPLATES (High Priority) ## RELATIONSHIP TEMPLATES (High Priority)

View File

@ -1,10 +1,6 @@
import type { EpisodicNode, StatementNode } from "@core/types"; import type { EpisodicNode, StatementNode } from "@core/types";
import { logger } from "./logger.service"; import { logger } from "./logger.service";
import { import { applyLLMReranking } from "./search/rerank";
applyCohereReranking,
applyCrossEncoderReranking,
applyMultiFactorMMRReranking,
} from "./search/rerank";
import { import {
getEpisodesByStatements, getEpisodesByStatements,
performBfsSearch, performBfsSearch,
@ -14,7 +10,6 @@ import {
import { getEmbedding } from "~/lib/model.server"; import { getEmbedding } from "~/lib/model.server";
import { prisma } from "~/db.server"; import { prisma } from "~/db.server";
import { runQuery } from "~/lib/neo4j.server"; import { runQuery } from "~/lib/neo4j.server";
import { env } from "~/env.server";
/** /**
* SearchService provides methods to search the reified + temporal knowledge graph * SearchService provides methods to search the reified + temporal knowledge graph
@ -36,12 +31,21 @@ export class SearchService {
query: string, query: string,
userId: string, userId: string,
options: SearchOptions = {}, options: SearchOptions = {},
): Promise<{ episodes: string[]; facts: { fact: string; validAt: Date; invalidAt: Date | null; relevantScore: number }[] }> { source?: string,
): Promise<{
episodes: string[];
facts: {
fact: string;
validAt: Date;
invalidAt: Date | null;
relevantScore: number;
}[];
}> {
const startTime = Date.now(); const startTime = Date.now();
// Default options // Default options
const opts: Required<SearchOptions> = { const opts: Required<SearchOptions> = {
limit: options.limit || 10, limit: options.limit || 100,
maxBfsDepth: options.maxBfsDepth || 4, maxBfsDepth: options.maxBfsDepth || 4,
validAt: options.validAt || new Date(), validAt: options.validAt || new Date(),
startTime: options.startTime || null, startTime: options.startTime || null,
@ -61,7 +65,7 @@ export class SearchService {
const [bm25Results, vectorResults, bfsResults] = await Promise.all([ const [bm25Results, vectorResults, bfsResults] = await Promise.all([
performBM25Search(query, userId, opts), performBM25Search(query, userId, opts),
performVectorSearch(queryVector, userId, opts), performVectorSearch(queryVector, userId, opts),
performBfsSearch(queryVector, userId, opts), performBfsSearch(query, queryVector, userId, opts),
]); ]);
logger.info( logger.info(
@ -71,16 +75,18 @@ export class SearchService {
// 2. Apply reranking strategy // 2. Apply reranking strategy
const rankedStatements = await this.rerankResults( const rankedStatements = await this.rerankResults(
query, query,
userId,
{ bm25: bm25Results, vector: vectorResults, bfs: bfsResults }, { bm25: bm25Results, vector: vectorResults, bfs: bfsResults },
opts, opts,
); );
// // 3. Apply adaptive filtering based on score threshold and minimum count // // 3. Apply adaptive filtering based on score threshold and minimum count
const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts); const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
// const filteredResults = rankedStatements;
// 3. Return top results // 3. Return top results
const episodes = await getEpisodesByStatements(filteredResults.map((item) => item.statement)); const episodes = await getEpisodesByStatements(
filteredResults.map((item) => item.statement),
);
// Log recall asynchronously (don't await to avoid blocking response) // Log recall asynchronously (don't await to avoid blocking response)
const responseTime = Date.now() - startTime; const responseTime = Date.now() - startTime;
@ -90,11 +96,16 @@ export class SearchService {
filteredResults.map((item) => item.statement), filteredResults.map((item) => item.statement),
opts, opts,
responseTime, responseTime,
source,
).catch((error) => { ).catch((error) => {
logger.error("Failed to log recall event:", error); logger.error("Failed to log recall event:", error);
}); });
this.updateRecallCount(userId, episodes, filteredResults.map((item) => item.statement)); this.updateRecallCount(
userId,
episodes,
filteredResults.map((item) => item.statement),
);
return { return {
episodes: episodes.map((episode) => episode.originalContent), episodes: episodes.map((episode) => episode.originalContent),
@ -114,7 +125,7 @@ export class SearchService {
private applyAdaptiveFiltering( private applyAdaptiveFiltering(
results: StatementNode[], results: StatementNode[],
options: Required<SearchOptions>, options: Required<SearchOptions>,
): { statement: StatementNode, score: number }[] { ): { statement: StatementNode; score: number }[] {
if (results.length === 0) return []; if (results.length === 0) return [];
let isRRF = false; let isRRF = false;
@ -152,7 +163,11 @@ export class SearchService {
// If no scores are available, return the original results // If no scores are available, return the original results
if (!hasScores) { if (!hasScores) {
logger.info("No scores found in results, skipping adaptive filtering"); logger.info("No scores found in results, skipping adaptive filtering");
return options.limit > 0 ? results.slice(0, options.limit).map((item) => ({ statement: item, score: 0 })) : results.map((item) => ({ statement: item, score: 0 })); return options.limit > 0
? results
.slice(0, options.limit)
.map((item) => ({ statement: item, score: 0 }))
: results.map((item) => ({ statement: item, score: 0 }));
} }
// Sort by score (descending) // Sort by score (descending)
@ -207,9 +222,9 @@ export class SearchService {
const limitedResults = const limitedResults =
options.limit > 0 options.limit > 0
? filteredResults.slice( ? filteredResults.slice(
0, 0,
Math.min(filteredResults.length, options.limit), Math.min(filteredResults.length, options.limit),
) )
: filteredResults; : filteredResults;
logger.info( logger.info(
@ -227,6 +242,7 @@ export class SearchService {
*/ */
private async rerankResults( private async rerankResults(
query: string, query: string,
userId: string,
results: { results: {
bm25: StatementNode[]; bm25: StatementNode[];
vector: StatementNode[]; vector: StatementNode[];
@ -234,31 +250,17 @@ export class SearchService {
}, },
options: Required<SearchOptions>, options: Required<SearchOptions>,
): Promise<StatementNode[]> { ): Promise<StatementNode[]> {
// Count non-empty result sources // Fetch user profile for context
const nonEmptySources = [ const user = await prisma.user.findUnique({
results.bm25.length > 0, where: { id: userId },
results.vector.length > 0, select: { name: true, id: true },
results.bfs.length > 0,
].filter(Boolean).length;
if (env.COHERE_API_KEY) {
logger.info("Using Cohere reranking");
return applyCohereReranking(query, results, options);
}
// If results are coming from only one source, use cross-encoder reranking
if (nonEmptySources <= 1) {
logger.info(
"Only one source has results, falling back to cross-encoder reranking",
);
return applyCrossEncoderReranking(query, results);
}
// Otherwise use combined MultiFactorReranking + MMR for multiple sources
return applyMultiFactorMMRReranking(results, {
lambda: 0.7, // Balance relevance (0.7) vs diversity (0.3)
maxResults: options.limit > 0 ? options.limit * 2 : 100, // Get more results for filtering
}); });
const userContext = user
? { name: user.name ?? undefined, userId: user.id }
: undefined;
return applyLLMReranking(query, results, options.limit, userContext);
} }
private async logRecallAsync( private async logRecallAsync(
@ -267,6 +269,7 @@ export class SearchService {
results: StatementNode[], results: StatementNode[],
options: Required<SearchOptions>, options: Required<SearchOptions>,
responseTime: number, responseTime: number,
source?: string,
): Promise<void> { ): Promise<void> {
try { try {
// Determine target type based on results // Determine target type based on results
@ -317,7 +320,7 @@ export class SearchService {
startTime: options.startTime?.toISOString() || null, startTime: options.startTime?.toISOString() || null,
endTime: options.endTime.toISOString(), endTime: options.endTime.toISOString(),
}), }),
source: "search_api", source: source ?? "search_api",
responseTimeMs: responseTime, responseTimeMs: responseTime,
userId, userId,
}, },

View File

@ -442,6 +442,105 @@ export function applyMultiFactorReranking(results: {
return sortedResults; return sortedResults;
} }
/**
* Apply LLM-based reranking for contextual understanding
* Uses GPT-4o-mini to verify relevance with semantic reasoning
*/
export async function applyLLMReranking(
query: string,
results: {
bm25: StatementNode[];
vector: StatementNode[];
bfs: StatementNode[];
},
limit: number = 10,
userContext?: { name?: string; userId: string },
): Promise<StatementNode[]> {
const allResults = [
...results.bm25.slice(0, 100),
...results.vector.slice(0, 100),
...results.bfs.slice(0, 100),
];
const uniqueResults = combineAndDeduplicateStatements(allResults);
logger.info(`Unique results: ${uniqueResults.length}`);
if (uniqueResults.length === 0) {
logger.info("No results to rerank with Cohere");
return [];
}
// Build user context section if provided
const userContextSection = userContext?.name
? `\nUser Identity Context:
- The user's name is "${userContext.name}"
- References to "user", "${userContext.name}", or pronouns like "my/their" refer to the same person
- When matching queries about "user's X" or "${userContext.name}'s X", these are equivalent\n`
: "";
const prompt = `You are a relevance filter. Given a user query and a list of facts, identify ONLY the facts that are truly relevant to answering the query.
${userContextSection}
Query: "${query}"
Facts:
${uniqueResults.map((r, i) => `${i}. ${r.fact}`).join("\n")}
Instructions:
- A fact is RELEVANT if it directly answers or provides information needed to answer the query
- A fact is NOT RELEVANT if it's tangentially related but doesn't answer the query
- Consider semantic meaning, not just keyword matching
${userContext?.name ? `- Remember: "user", "${userContext.name}", and possessive references ("my", "their") all refer to the same person` : ""}
- Only return facts with HIGH relevance (80% confidence)
- If you are not sure, return an empty array
Output format:
<output>[1, 5, 7]</output>
Return ONLY the numbers of highly relevant facts inside <output> tags as a JSON array:`;
try {
let responseText = "";
await makeModelCall(
false,
[{ role: "user", content: prompt }],
(text) => {
responseText = text;
},
{ temperature: 0 },
"high",
);
// Extract array from <output>[1, 5, 7]</output>
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {
responseText = outputMatch[1].trim();
const parsedResponse = JSON.parse(responseText || "[]");
const extractedIndices = Array.isArray(parsedResponse)
? parsedResponse
: parsedResponse.entities || [];
if (extractedIndices.length === 0) {
logger.warn(
"LLM reranking returned no valid indices, falling back to original order",
);
return [];
}
logger.info(
`LLM reranking selected ${extractedIndices.length} relevant facts`,
);
const selected = extractedIndices.map((i: number) => uniqueResults[i]);
return selected;
}
return uniqueResults.slice(0, limit);
} catch (error) {
logger.error("LLM reranking failed, falling back to original order:", {
error,
});
return uniqueResults.slice(0, limit);
}
}
/** /**
* Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching * Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching
* This is particularly effective for bridging the semantic gap between questions and factual statements * This is particularly effective for bridging the semantic gap between questions and factual statements
@ -456,6 +555,7 @@ export async function applyCohereReranking(
options?: { options?: {
limit?: number; limit?: number;
model?: string; model?: string;
useLLMVerification?: boolean;
}, },
): Promise<StatementNode[]> { ): Promise<StatementNode[]> {
const { model = "rerank-v3.5" } = options || {}; const { model = "rerank-v3.5" } = options || {};
@ -491,10 +591,13 @@ export async function applyCohereReranking(
// Prepare documents for Cohere API // Prepare documents for Cohere API
const documents = uniqueResults.map((statement) => statement.fact); const documents = uniqueResults.map((statement) => statement.fact);
console.log("Documents:", documents);
logger.info( logger.info(
`Cohere reranking ${documents.length} statements with model ${model}`, `Cohere reranking ${documents.length} statements with model ${model}`,
); );
logger.info(`Cohere query: "${query}"`);
logger.info(`First 5 documents: ${documents.slice(0, 5).join(" | ")}`);
// Call Cohere Rerank API // Call Cohere Rerank API
const response = await cohere.rerank({ const response = await cohere.rerank({
@ -506,14 +609,24 @@ export async function applyCohereReranking(
console.log("Cohere reranking billed units:", response.meta?.billedUnits); console.log("Cohere reranking billed units:", response.meta?.billedUnits);
// Log top 5 Cohere results for debugging
logger.info(
`Cohere top 5 results:\n${response.results
.slice(0, 5)
.map(
(r, i) =>
` ${i + 1}. [${r.relevanceScore.toFixed(4)}] ${documents[r.index].substring(0, 80)}...`,
)
.join("\n")}`,
);
// Map results back to StatementNodes with Cohere scores // Map results back to StatementNodes with Cohere scores
const rerankedResults = response.results const rerankedResults = response.results.map((result, index) => ({
.map((result, index) => ({ ...uniqueResults[result.index],
...uniqueResults[result.index], cohereScore: result.relevanceScore,
cohereScore: result.relevanceScore, cohereRank: index + 1,
cohereRank: index + 1, }));
})) // .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));
.filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));
const responseTime = Date.now() - startTime; const responseTime = Date.now() - startTime;
logger.info( logger.info(

View File

@ -3,6 +3,8 @@ import type { SearchOptions } from "../search.server";
import type { Embedding } from "ai"; import type { Embedding } from "ai";
import { logger } from "../logger.service"; import { logger } from "../logger.service";
import { runQuery } from "~/lib/neo4j.server"; import { runQuery } from "~/lib/neo4j.server";
import { getEmbedding } from "~/lib/model.server";
import { findSimilarEntities } from "../graphModels/entity";
/** /**
* Perform BM25 keyword-based search on statements * Perform BM25 keyword-based search on statements
@ -129,25 +131,26 @@ export async function performVectorSearch(
`; `;
} }
// 1. Search for similar statements using Neo4j vector search with provenance count const limit = options.limit || 100;
// 1. Search for similar statements using GDS cosine similarity with provenance count
const cypher = ` const cypher = `
CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding) MATCH (s:Statement)
YIELD node AS s, score
WHERE s.userId = $userId WHERE s.userId = $userId
AND score >= 0.7
${timeframeCondition} ${timeframeCondition}
${spaceCondition} ${spaceCondition}
WITH s, gds.similarity.cosine(s.factEmbedding, $embedding) AS score
WHERE score >= 0.5
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s) OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
WITH s, score, count(episode) as provenanceCount WITH s, score, count(episode) as provenanceCount
RETURN s, score, provenanceCount RETURN s, score, provenanceCount
ORDER BY score DESC ORDER BY score DESC
LIMIT ${limit}
`; `;
const params = { const params = {
embedding: query, embedding: query,
userId, userId,
validAt: options.endTime.toISOString(), validAt: options.endTime.toISOString(),
topk: options.limit || 100,
...(options.startTime && { startTime: options.startTime.toISOString() }), ...(options.startTime && { startTime: options.startTime.toISOString() }),
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }), ...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
}; };
@ -170,133 +173,223 @@ export async function performVectorSearch(
/** /**
* Perform BFS traversal starting from entities mentioned in the query * Perform BFS traversal starting from entities mentioned in the query
* Uses guided search with semantic filtering to reduce noise
*/ */
export async function performBfsSearch( export async function performBfsSearch(
query: string,
embedding: Embedding, embedding: Embedding,
userId: string, userId: string,
options: Required<SearchOptions>, options: Required<SearchOptions>,
): Promise<StatementNode[]> { ): Promise<StatementNode[]> {
try { try {
// 1. Extract potential entities from query // 1. Extract potential entities from query using chunked embeddings
const entities = await extractEntitiesFromQuery(embedding, userId); const entities = await extractEntitiesFromQuery(query, userId);
// 2. For each entity, perform BFS traversal if (entities.length === 0) {
const allStatements: StatementNode[] = []; return [];
for (const entity of entities) {
const statements = await bfsTraversal(
entity.uuid,
options.maxBfsDepth,
options.endTime,
userId,
options.includeInvalidated,
options.startTime,
options.spaceIds,
);
allStatements.push(...statements);
} }
return allStatements; // 2. Perform guided BFS with semantic filtering
const statements = await bfsTraversal(
entities,
embedding,
options.maxBfsDepth || 3,
options.endTime,
userId,
options.includeInvalidated,
options.startTime,
);
// Return individual statements
return statements;
} catch (error) { } catch (error) {
logger.error("BFS search error:", { error }); logger.error("BFS search error:", { error });
return []; return [];
} }
} }
/** /**
* Perform BFS traversal starting from an entity * Iterative BFS traversal - explores up to 3 hops level-by-level using Neo4j cosine similarity
*/ */
export async function bfsTraversal( async function bfsTraversal(
startEntityId: string, startEntities: EntityNode[],
queryEmbedding: Embedding,
maxDepth: number, maxDepth: number,
validAt: Date, validAt: Date,
userId: string, userId: string,
includeInvalidated: boolean, includeInvalidated: boolean,
startTime: Date | null, startTime: Date | null,
spaceIds: string[] = [],
): Promise<StatementNode[]> { ): Promise<StatementNode[]> {
try { const RELEVANCE_THRESHOLD = 0.5;
// Build the WHERE clause based on timeframe options const EXPLORATION_THRESHOLD = 0.3;
let timeframeCondition = `
AND s.validAt <= $validAt
${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
`;
// If startTime is provided, add condition to filter by validAt >= startTime const allStatements = new Map<string, number>(); // uuid -> relevance
if (startTime) { const visitedEntities = new Set<string>();
timeframeCondition = `
AND s.validAt <= $validAt
${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
AND s.validAt >= $startTime
`;
}
// Add space filtering if spaceIds are provided // Track entities per level for iterative BFS
let spaceCondition = ""; let currentLevelEntities = startEntities.map(e => e.uuid);
if (spaceIds.length > 0) {
spaceCondition = `
AND s.spaceIds IS NOT NULL AND ANY(spaceId IN $spaceIds WHERE spaceId IN s.spaceIds)
`;
}
// Use Neo4j's built-in path finding capabilities for efficient BFS // Timeframe condition for temporal filtering
// This query implements BFS up to maxDepth and collects all statements along the way let timeframeCondition = `
AND s.validAt <= $validAt
${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
`;
if (startTime) {
timeframeCondition += ` AND s.validAt >= $startTime`;
}
// Process each depth level
for (let depth = 0; depth < maxDepth; depth++) {
if (currentLevelEntities.length === 0) break;
// Mark entities as visited at this depth
currentLevelEntities.forEach(id => visitedEntities.add(`${id}`));
// Get statements for current level entities with cosine similarity calculated in Neo4j
const cypher = ` const cypher = `
MATCH (e:Entity {uuid: $startEntityId})<-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement) MATCH (e:Entity{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement{userId: $userId})
WHERE WHERE e.uuid IN $entityIds
(s.userId = $userId) ${timeframeCondition}
${includeInvalidated ? 'AND s.validAt <= $validAt' : timeframeCondition} WITH DISTINCT s // Deduplicate first
${spaceCondition} WITH s, gds.similarity.cosine(s.factEmbedding, $queryEmbedding) AS relevance
RETURN s as statement WHERE relevance >= $explorationThreshold
RETURN s.uuid AS uuid, relevance
ORDER BY relevance DESC
LIMIT 200 // Cap per BFS level to avoid explosion
`; `;
const params = { const records = await runQuery(cypher, {
startEntityId, entityIds: currentLevelEntities,
maxDepth,
validAt: validAt.toISOString(),
userId, userId,
includeInvalidated, queryEmbedding,
explorationThreshold: EXPLORATION_THRESHOLD,
validAt: validAt.toISOString(),
...(startTime && { startTime: startTime.toISOString() }), ...(startTime && { startTime: startTime.toISOString() }),
...(spaceIds.length > 0 && { spaceIds }), });
};
const records = await runQuery(cypher, params); // Store statement relevance scores
return records.map( const currentLevelStatementUuids: string[] = [];
(record) => record.get("statement").properties as StatementNode, for (const record of records) {
); const uuid = record.get("uuid");
} catch (error) { const relevance = record.get("relevance");
logger.error("BFS traversal error:", { error });
if (!allStatements.has(uuid)) {
allStatements.set(uuid, relevance);
currentLevelStatementUuids.push(uuid);
}
}
// Get connected entities for next level
if (depth < maxDepth - 1 && currentLevelStatementUuids.length > 0) {
const nextCypher = `
MATCH (s:Statement{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]->(e:Entity{userId: $userId})
WHERE s.uuid IN $statementUuids
RETURN DISTINCT e.uuid AS entityId
`;
const nextRecords = await runQuery(nextCypher, {
statementUuids: currentLevelStatementUuids,
userId
});
// Filter out already visited entities
currentLevelEntities = nextRecords
.map(r => r.get("entityId"))
.filter(id => !visitedEntities.has(`${id}`));
} else {
currentLevelEntities = [];
}
}
// Filter by relevance threshold and fetch full statements
const relevantUuids = Array.from(allStatements.entries())
.filter(([_, relevance]) => relevance >= RELEVANCE_THRESHOLD)
.sort((a, b) => b[1] - a[1])
.map(([uuid]) => uuid);
if (relevantUuids.length === 0) {
return []; return [];
} }
const fetchCypher = `
MATCH (s:Statement{userId: $userId})
WHERE s.uuid IN $uuids
RETURN s
`;
const fetchRecords = await runQuery(fetchCypher, { uuids: relevantUuids, userId });
const statements = fetchRecords.map(r => r.get("s").properties as StatementNode);
logger.info(
`BFS: explored ${allStatements.size} statements across ${maxDepth} hops, returning ${statements.length} (≥${RELEVANCE_THRESHOLD})`
);
return statements;
}
/**
* Generate query chunks (individual words and bigrams) for entity extraction
*/
function generateQueryChunks(query: string): string[] {
const words = query.toLowerCase()
.trim()
.split(/\s+/)
.filter(word => word.length > 0);
const chunks: string[] = [];
// Add individual words (for entities like "user")
chunks.push(...words);
// Add bigrams (for multi-word entities like "home address")
for (let i = 0; i < words.length - 1; i++) {
chunks.push(`${words[i]} ${words[i + 1]}`);
}
// Add full query as final chunk
chunks.push(query.toLowerCase().trim());
return chunks;
} }
/** /**
* Extract potential entities from a query using embeddings or LLM * Extract potential entities from a query using chunked embeddings
* Chunks query into words/bigrams, embeds each chunk, finds entities for each
*/ */
export async function extractEntitiesFromQuery( export async function extractEntitiesFromQuery(
embedding: Embedding, query: string,
userId: string, userId: string,
): Promise<EntityNode[]> { ): Promise<EntityNode[]> {
try { try {
// Use vector similarity to find relevant entities // Generate chunks from query
const cypher = ` const chunks = generateQueryChunks(query);
// Match entities using vector index on name embeddings
CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
YIELD node AS e, score
WHERE e.userId = $userId
AND score > 0.7
RETURN e
ORDER BY score DESC
`;
const params = { // Get embeddings for each chunk
embedding, const chunkEmbeddings = await Promise.all(
userId, chunks.map(chunk => getEmbedding(chunk))
}; );
const records = await runQuery(cypher, params); // Search for entities matching each chunk embedding
const allEntitySets = await Promise.all(
chunkEmbeddings.map(async (embedding) => {
return await findSimilarEntities({
queryEmbedding: embedding,
limit: 3,
threshold: 0.7,
userId,
});
})
);
return records.map((record) => record.get("e").properties as EntityNode); // Flatten and deduplicate entities by ID
const allEntities = allEntitySets.flat();
const uniqueEntities = Array.from(
new Map(allEntities.map(e => [e.uuid, e])).values()
);
return uniqueEntities;
} catch (error) { } catch (error) {
logger.error("Entity extraction error:", { error }); logger.error("Entity extraction error:", { error });
return []; return [];

View File

@ -17,13 +17,12 @@ import { generate, processTag } from "./stream-utils";
import { type AgentMessage, AgentMessageType, Message } from "./types"; import { type AgentMessage, AgentMessageType, Message } from "./types";
import { type MCP } from "../utils/mcp"; import { type MCP } from "../utils/mcp";
import { import {
WebSearchSchema,
type ExecutionState, type ExecutionState,
type HistoryStep, type HistoryStep,
type Resource, type Resource,
type TotalCost, type TotalCost,
} from "../utils/types"; } from "../utils/types";
import { flattenObject, webSearch } from "../utils/utils"; import { flattenObject } from "../utils/utils";
import { searchMemory, addMemory, searchSpaces } from "./memory-utils"; import { searchMemory, addMemory, searchSpaces } from "./memory-utils";
interface LLMOutputInterface { interface LLMOutputInterface {
@ -119,12 +118,6 @@ const searchSpacesTool = tool({
}), }),
}); });
const websearchTool = tool({
description:
"Search the web for current information and news. Use this when you need up-to-date information that might not be in your training data. Try different search strategies: broad terms first, then specific phrases, keywords, exact quotes. Use multiple searches with varied approaches to get comprehensive results.",
parameters: WebSearchSchema,
});
const loadMCPTools = tool({ const loadMCPTools = tool({
description: description:
"Load tools for a specific integration. Call this when you need to use a third-party service.", "Load tools for a specific integration. Call this when you need to use a third-party service.",
@ -310,7 +303,6 @@ export async function* run(
"core--search_memory": searchMemoryTool, "core--search_memory": searchMemoryTool,
"core--add_memory": addMemoryTool, "core--add_memory": addMemoryTool,
"core--search_spaces": searchSpacesTool, "core--search_spaces": searchSpacesTool,
"core--websearch": websearchTool,
"core--load_mcp": loadMCPTools, "core--load_mcp": loadMCPTools,
}; };
@ -578,16 +570,6 @@ export async function* run(
}); });
result = "Search spaces call failed"; result = "Search spaces call failed";
} }
} else if (toolName === "websearch") {
try {
result = await webSearch(skillInput);
} catch (apiError) {
logger.error("Web search failed", {
apiError,
});
result =
"Web search failed - please check your search configuration";
}
} else if (toolName === "load_mcp") { } else if (toolName === "load_mcp") {
// Load MCP integration and update available tools // Load MCP integration and update available tools
await mcp.load(skillInput.integration, mcpHeaders); await mcp.load(skillInput.integration, mcpHeaders);

View File

@ -1,5 +1,5 @@
export const REACT_SYSTEM_PROMPT = ` export const REACT_SYSTEM_PROMPT = `
You are a helpful AI assistant with access to user memory and web search capabilities. Your primary capabilities are: You are a helpful AI assistant with access to user memory. Your primary capabilities are:
1. **Memory-First Approach**: Always check user memory first to understand context and previous interactions 1. **Memory-First Approach**: Always check user memory first to understand context and previous interactions
2. **Intelligent Information Gathering**: Analyze queries to determine if current information is needed 2. **Intelligent Information Gathering**: Analyze queries to determine if current information is needed
@ -19,43 +19,17 @@ Follow this intelligent approach for information gathering:
- Memory provides context, personal preferences, and historical information - Memory provides context, personal preferences, and historical information
- Use memory to understand user's background, ongoing projects, and past conversations - Use memory to understand user's background, ongoing projects, and past conversations
2. **QUERY ANALYSIS** (Determine Information Needs) 2. **INFORMATION SYNTHESIS** (Combine Sources)
Analyze the user's query to identify if it requires current/latest information:
**Use web search (core--websearch) when query involves:**
- Current events, news, or recent developments
- "Latest", "recent", "current", "today", "now" keywords
- Stock prices, market data, or financial information
- Software updates, version releases, or technical documentation
- Weather, traffic, or real-time data
- Recent changes to websites, APIs, or services
- Product releases, availability, or pricing
- Breaking news or trending topics
- Verification of potentially outdated information
**Examples requiring web search:**
- "What's the latest news about..."
- "Current price of..."
- "Recent updates to..."
- "What happened today..."
- "Latest version of..."
3. **INFORMATION SYNTHESIS** (Combine Sources)
- Combine memory context with web search results when both are relevant
- Use memory to personalize current information based on user preferences - Use memory to personalize current information based on user preferences
- Cross-reference web findings with user's historical interests from memory
- Always store new useful information in memory using core--add_memory - Always store new useful information in memory using core--add_memory
4. **TRAINING KNOWLEDGE** (Foundation) 3. **TRAINING KNOWLEDGE** (Foundation)
- Use your training knowledge as the foundation for analysis and explanation - Use your training knowledge as the foundation for analysis and explanation
- Apply training knowledge to interpret and contextualize information from memory and web - Apply training knowledge to interpret and contextualize information from memory
- Fill gaps where memory and web search don't provide complete answers
- Indicate when you're using training knowledge vs. live information sources - Indicate when you're using training knowledge vs. live information sources
EXECUTION APPROACH: EXECUTION APPROACH:
- Memory search is mandatory for every interaction - Memory search is mandatory for every interaction
- Web search is conditional based on query analysis
- Both can be executed in parallel when web search is needed
- Always indicate your information sources in responses - Always indicate your information sources in responses
</information_gathering> </information_gathering>
@ -95,7 +69,6 @@ MEMORY USAGE:
- Blend memory insights naturally into responses - Blend memory insights naturally into responses
- Verify you've checked relevant memory before finalizing ANY response - Verify you've checked relevant memory before finalizing ANY response
If memory access is unavailable, proceed to web search or rely on current conversation
</memory> </memory>
<external_services> <external_services>
@ -113,7 +86,6 @@ You have tools at your disposal to assist users:
CORE PRINCIPLES: CORE PRINCIPLES:
- Use tools only when necessary for the task at hand - Use tools only when necessary for the task at hand
- Always check memory FIRST before making other tool calls - Always check memory FIRST before making other tool calls
- Use web search when query analysis indicates need for current information
- Execute multiple operations in parallel whenever possible - Execute multiple operations in parallel whenever possible
- Use sequential calls only when output of one is required for input of another - Use sequential calls only when output of one is required for input of another
@ -162,7 +134,7 @@ QUESTIONS - When you need information:
<p>[Your question with HTML formatting]</p> <p>[Your question with HTML formatting]</p>
</question_response> </question_response>
- Ask questions only when you cannot find information through memory, web search, or tools - Ask questions only when you cannot find information through memory, or tools
- Be specific about what you need to know - Be specific about what you need to know
- Provide context for why you're asking - Provide context for why you're asking
@ -176,7 +148,7 @@ CRITICAL:
- Apply proper HTML formatting (<h1>, <h2>, <p>, <ul>, <li>, etc.) - Apply proper HTML formatting (<h1>, <h2>, <p>, <ul>, <li>, etc.)
- Never mix communication formats - Never mix communication formats
- Keep responses clear and helpful - Keep responses clear and helpful
- Always indicate your information sources (memory, web search, and/or knowledge) - Always indicate your information sources (memory, and/or knowledge)
</communication> </communication>
`; `;

View File

@ -122,67 +122,3 @@ export interface GenerateResponse {
// eslint-disable-next-line @typescript-eslint/no-explicit-any // eslint-disable-next-line @typescript-eslint/no-explicit-any
toolCalls: any[]; toolCalls: any[];
} }
export interface WebSearchResult {
results: Array<{
title: string;
url: string;
content: string;
publishedDate: string;
highlights: string[];
text: string;
score: number;
}>;
}
export const WebSearchSchema = z.object({
query: z
.string()
.min(1)
.describe("The search query to find relevant web content"),
numResults: z
.number()
.min(1)
.max(20)
.optional()
.default(5)
.describe("Number of results to return (1-20, default: 5)"),
includeContent: z
.boolean()
.optional()
.default(false)
.describe("Whether to include full page content in results"),
includeHighlights: z
.boolean()
.optional()
.default(false)
.describe("Whether to include relevant text highlights from pages"),
domains: z
.array(z.string())
.optional()
.describe(
'Array of domains to include in search (e.g., ["github.com", "stackoverflow.com"])',
),
excludeDomains: z
.array(z.string())
.optional()
.describe("Array of domains to exclude from search"),
startCrawlDate: z
.string()
.optional()
.describe("Start date for content crawling in YYYY-MM-DD format"),
endCrawlDate: z
.string()
.optional()
.describe("End date for content crawling in YYYY-MM-DD format"),
startPublishedDate: z
.string()
.optional()
.describe("Start date for content publishing in YYYY-MM-DD format"),
endPublishedDate: z
.string()
.optional()
.describe("End date for content publishing in YYYY-MM-DD format"),
});
export type WebSearchArgs = z.infer<typeof WebSearchSchema>;

View File

@ -12,11 +12,7 @@ import {
import { logger } from "@trigger.dev/sdk/v3"; import { logger } from "@trigger.dev/sdk/v3";
import { type CoreMessage } from "ai"; import { type CoreMessage } from "ai";
import { import { type HistoryStep } from "./types";
type WebSearchArgs,
type WebSearchResult,
type HistoryStep,
} from "./types";
import axios from "axios"; import axios from "axios";
import nodeCrypto from "node:crypto"; import nodeCrypto from "node:crypto";
import { customAlphabet, nanoid } from "nanoid"; import { customAlphabet, nanoid } from "nanoid";
@ -496,72 +492,6 @@ export async function deletePersonalAccessToken(tokenId: string) {
}); });
} }
export async function webSearch(args: WebSearchArgs): Promise<WebSearchResult> {
const apiKey = process.env.EXA_API_KEY;
if (!apiKey) {
throw new Error(
"EXA_API_KEY environment variable is required for web search",
);
}
const exa = new Exa(apiKey);
try {
const searchOptions = {
numResults: args.numResults || 5,
...(args.domains && { includeDomains: args.domains }),
...(args.excludeDomains && { excludeDomains: args.excludeDomains }),
...(args.startCrawlDate && { startCrawlDate: args.startCrawlDate }),
...(args.endCrawlDate && { endCrawlDate: args.endCrawlDate }),
...(args.startPublishedDate && {
startPublishedDate: args.startPublishedDate,
}),
...(args.endPublishedDate && { endPublishedDate: args.endPublishedDate }),
};
let result;
if (args.includeContent || args.includeHighlights) {
// Use searchAndContents for rich results
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const contentsOptions: any = {
...searchOptions,
};
if (args.includeContent) {
contentsOptions.text = true;
}
if (args.includeHighlights) {
contentsOptions.highlights = true;
}
result = await exa.searchAndContents(args.query, contentsOptions);
} else {
// Use basic search for URLs only
result = await exa.search(args.query, searchOptions);
}
return {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
results: result.results.map((item: any) => ({
title: item.title,
url: item.url,
content: item.text,
publishedDate: item.publishedDate,
highlights: item.highlights,
text: item.text,
score: item.score,
})),
};
} catch (error) {
throw new Error(
`Web search failed: ${error instanceof Error ? error.message : "Unknown error"}`,
);
}
}
// Credit management functions have been moved to ~/services/billing.server.ts // Credit management functions have been moved to ~/services/billing.server.ts
// Use deductCredits() instead of these functions // Use deductCredits() instead of these functions
export type CreditOperation = "addEpisode" | "search" | "chatMessage"; export type CreditOperation = "addEpisode" | "search" | "chatMessage";
@ -720,10 +650,6 @@ export async function deductCredits(
}), }),
]); ]);
} else { } else {
// Free plan - throw error
throw new InsufficientCreditsError(
"Insufficient credits. Please upgrade to Pro or Max plan to continue.",
);
} }
} }
} }

View File

@ -198,10 +198,15 @@ async function handleMemoryIngest(args: any) {
// Handler for memory_search // Handler for memory_search
async function handleMemorySearch(args: any) { async function handleMemorySearch(args: any) {
try { try {
const results = await searchService.search(args.query, args.userId, { const results = await searchService.search(
startTime: args.startTime ? new Date(args.startTime) : undefined, args.query,
endTime: args.endTime ? new Date(args.endTime) : undefined, args.userId,
}); {
startTime: args.startTime ? new Date(args.startTime) : undefined,
endTime: args.endTime ? new Date(args.endTime) : undefined,
},
args.source,
);
return { return {
content: [ content: [