Fix: Semantic Search issue (#89)

* Fix: normalization prompt

* Fix: improve knowledge graph and better recall

* fix: add user context to search reranking

* fix: in search log the source

* fix: remove harcoded limit

---------

Co-authored-by: Harshith Mullapudi <harshithmullapudi@gmail.com>
This commit is contained in:
Manoj 2025-10-06 14:06:52 +05:30 committed by GitHub
parent 3d1b93d97d
commit 27f8740691
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 580 additions and 734 deletions

View File

@ -83,8 +83,9 @@ export async function makeModelCall(
const generateTextOptions: any = {}
console.log('complexity:', complexity, 'model:', model)
logger.info(
`complexity: ${complexity}, model: ${model}`,
);
switch (model) {
case "gpt-4.1-2025-04-14":
case "gpt-4.1-mini-2025-04-14":

View File

@ -1,315 +0,0 @@
import { json } from "@remix-run/node";
import { randomUUID } from "node:crypto";
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
import { z } from "zod";
import { createHybridActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
import { addToQueue } from "~/lib/ingest.server";
import { SearchService } from "~/services/search.server";
import { handleTransport } from "~/utils/mcp";
import { SpaceService } from "~/services/space.server";
import { EpisodeTypeEnum } from "@core/types";
// Map to store transports by session ID with cleanup tracking
const transports: {
[sessionId: string]: {
transport: StreamableHTTPServerTransport;
createdAt: number;
};
} = {};
// MCP request body schema
const MCPRequestSchema = z.object({}).passthrough();
const SourceParams = z.object({
source: z.string().optional(),
});
// Search parameters schema for MCP tool
const SearchParamsSchema = z.object({
query: z.string().describe("The search query in third person perspective"),
validAt: z.string().optional().describe("The valid at time in ISO format"),
startTime: z.string().optional().describe("The start time in ISO format"),
endTime: z.string().optional().describe("The end time in ISO format"),
spaceIds: z
.array(z.string())
.optional()
.describe("Array of strings representing UUIDs of spaces"),
});
const IngestSchema = z.object({
message: z.string().describe("The data to ingest in text format"),
});
const searchService = new SearchService();
const spaceService = new SpaceService();
// Handle MCP HTTP requests properly
const handleMCPRequest = async (
request: Request,
body: any,
authentication: any,
params: z.infer<typeof SourceParams>,
) => {
const sessionId = request.headers.get("mcp-session-id") as string | undefined;
const source =
(request.headers.get("source") as string | undefined) ??
(params.source as string | undefined);
if (!source) {
return json(
{
jsonrpc: "2.0",
error: {
code: -32601,
message: "No source found",
},
id: null,
},
{ status: 400 },
);
}
let transport: StreamableHTTPServerTransport;
try {
if (sessionId && transports[sessionId]) {
// Reuse existing transport
transport = transports[sessionId].transport;
} else if (!sessionId && isInitializeRequest(body)) {
// New initialization request
transport = new StreamableHTTPServerTransport({
sessionIdGenerator: () => randomUUID(),
onsessioninitialized: (sessionId) => {
// Store the transport by session ID with timestamp
transports[sessionId] = {
transport,
createdAt: Date.now(),
};
},
});
// Clean up transport when closed
transport.onclose = () => {
if (transport.sessionId) {
delete transports[transport.sessionId];
}
};
const server = new McpServer(
{
name: "echo-memory-server",
version: "1.0.0",
},
{
capabilities: {
tools: {},
},
},
);
// Register ingest tool
server.registerTool(
"ingest",
{
title: "Ingest Data",
description: "Ingest data into the memory system",
inputSchema: IngestSchema.shape,
},
async (args) => {
try {
const userId = authentication.userId;
const response = addToQueue(
{
episodeBody: args.message,
referenceTime: new Date().toISOString(),
source,
type: EpisodeTypeEnum.CONVERSATION,
},
userId,
);
return {
content: [
{
type: "text",
text: JSON.stringify(response),
},
],
};
} catch (error) {
console.error("MCP ingest error:", error);
return {
content: [
{
type: "text",
text: `Error ingesting data: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
},
);
// Register search tool
server.registerTool(
"search",
{
title: "Search Data",
description: "Search through ingested data",
inputSchema: SearchParamsSchema.shape,
},
async (args) => {
try {
const userId = authentication.userId;
const results = await searchService.search(args.query, userId, {
startTime: args.startTime ? new Date(args.startTime) : undefined,
endTime: args.endTime ? new Date(args.endTime) : undefined,
});
return {
content: [
{
type: "text",
text: JSON.stringify(results),
},
],
};
} catch (error) {
console.error("MCP search error:", error);
return {
content: [
{
type: "text",
text: `Error searching: ${error instanceof Error ? error.message : String(error)}`,
},
],
isError: true,
};
}
},
);
// Register search tool
server.registerTool(
"get_spaces",
{
title: "Get spaces",
description: "Get spaces in memory",
},
async () => {
try {
const userId = authentication.userId;
const spaces = await spaceService.getUserSpaces(userId);
return {
content: [
{
type: "text",
text: JSON.stringify(spaces),
},
],
isError: false,
};
} catch (error) {
console.error("Spaces error:", error);
return {
content: [
{
type: "text",
text: `Error getting spaces`,
},
],
isError: true,
};
}
},
);
// Connect to the MCP server
await server.connect(transport);
} else {
// Invalid request
throw new Error("Bad Request: No valid session ID provided");
}
const response = await handleTransport(transport, request, body);
return response;
} catch (error) {
console.error("MCP request error:", error);
return json(
{
jsonrpc: "2.0",
error: {
code: -32000,
message:
error instanceof Error ? error.message : "Internal server error",
},
id: body?.id || null,
},
{ status: 500 },
);
}
};
// Handle DELETE requests for session cleanup
const handleDelete = async (request: Request, authentication: any) => {
const sessionId = request.headers.get("mcp-session-id") as string | undefined;
if (!sessionId || !transports[sessionId]) {
return new Response("Invalid or missing session ID", { status: 400 });
}
const transport = transports[sessionId].transport;
// Clean up transport
transport.close();
delete transports[sessionId];
return new Response(null, { status: 204 });
};
const { action, loader } = createHybridActionApiRoute(
{
body: MCPRequestSchema,
searchParams: SourceParams,
allowJWT: true,
authorization: {
action: "mcp",
},
corsStrategy: "all",
},
async ({ body, authentication, request, searchParams }) => {
const method = request.method;
if (method === "POST") {
return await handleMCPRequest(
request,
body,
authentication,
searchParams,
);
} else if (method === "DELETE") {
return await handleDelete(request, authentication);
} else {
return json(
{
jsonrpc: "2.0",
error: {
code: -32601,
message: "Method not allowed",
},
id: null,
},
{ status: 405 },
);
}
},
);
export { action, loader };

View File

@ -278,7 +278,7 @@ export default function BillingSettings() {
</div>
<div className="space-y-2">
<div className="flex justify-between text-sm">
<span className="text-muted-foreground">Episodes</span>
<span className="text-muted-foreground">Facts</span>
<span className="font-medium">
{usageSummary.usage.episodes}
</span>

View File

@ -76,16 +76,19 @@ export async function findSimilarEntities(params: {
threshold: number;
userId: string;
}): Promise<EntityNode[]> {
const limit = params.limit || 5;
const query = `
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
YIELD node AS entity, score
CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
YIELD node AS entity
WHERE entity.userId = $userId
WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
WHERE score >= $threshold
AND entity.userId = $userId
RETURN entity, score
ORDER BY score DESC
LIMIT ${limit}
`;
const result = await runQuery(query, { ...params, topK: params.limit });
const result = await runQuery(query, { ...params });
return result.map((record) => {
const entity = record.get("entity").properties;
@ -110,17 +113,20 @@ export async function findSimilarEntitiesWithSameType(params: {
threshold: number;
userId: string;
}): Promise<EntityNode[]> {
const limit = params.limit || 5;
const query = `
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
YIELD node AS entity, score
WHERE score >= $threshold
AND entity.userId = $userId
CALL db.index.vector.queryNodes('entity_embedding', ${limit*2}, $queryEmbedding)
YIELD node AS entity
WHERE entity.userId = $userId
AND entity.type = $entityType
WITH entity, gds.similarity.cosine(entity.nameEmbedding, $queryEmbedding) AS score
WHERE score >= $threshold
RETURN entity, score
ORDER BY score DESC
LIMIT ${limit}
`;
const result = await runQuery(query, { ...params, topK: params.limit });
const result = await runQuery(query, { ...params });
return result.map((record) => {
const entity = record.get("entity").properties;

View File

@ -138,19 +138,21 @@ export async function searchEpisodesByEmbedding(params: {
limit?: number;
minSimilarity?: number;
}) {
const limit = params.limit || 100;
const query = `
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
YIELD node AS episode, score
CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
YIELD node AS episode
WHERE episode.userId = $userId
AND score >= $minSimilarity
WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
WHERE score >= $minSimilarity
RETURN episode, score
ORDER BY score DESC`;
ORDER BY score DESC
LIMIT ${limit}`;
const result = await runQuery(query, {
embedding: params.embedding,
minSimilarity: params.minSimilarity,
userId: params.userId,
topK: 100,
});
if (!result || result.length === 0) {
@ -281,20 +283,22 @@ export async function getRelatedEpisodesEntities(params: {
limit?: number;
minSimilarity?: number;
}) {
const limit = params.limit || 100;
const query = `
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
YIELD node AS episode, score
CALL db.index.vector.queryNodes('episode_embedding', ${limit*2}, $embedding)
YIELD node AS episode
WHERE episode.userId = $userId
AND score >= $minSimilarity
WITH episode, gds.similarity.cosine(episode.contentEmbedding, $embedding) AS score
WHERE score >= $minSimilarity
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
WHERE entity IS NOT NULL
RETURN DISTINCT entity`;
RETURN DISTINCT entity
LIMIT ${limit}`;
const result = await runQuery(query, {
embedding: params.embedding,
minSimilarity: params.minSimilarity,
userId: params.userId,
topK: params.limit || 100,
});
return result

View File

@ -211,15 +211,18 @@ export async function findSimilarStatements({
excludeIds?: string[];
userId: string;
}): Promise<Omit<StatementNode, "factEmbedding">[]> {
const limit = 100;
const query = `
CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
YIELD node AS statement, score
CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $factEmbedding)
YIELD node AS statement
WHERE statement.userId = $userId
AND statement.invalidAt IS NULL
AND score >= $threshold
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
WITH statement, gds.similarity.cosine(statement.factEmbedding, $factEmbedding) AS score
WHERE score >= $threshold
RETURN statement, score
ORDER BY score DESC
LIMIT ${limit}
`;
const result = await runQuery(query, {
@ -227,7 +230,6 @@ export async function findSimilarStatements({
threshold,
excludeIds,
userId,
topK: 100,
});
if (!result || result.length === 0) {
@ -410,14 +412,17 @@ export async function searchStatementsByEmbedding(params: {
limit?: number;
minSimilarity?: number;
}) {
const limit = params.limit || 100;
const query = `
CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
YIELD node AS statement, score
CALL db.index.vector.queryNodes('statement_embedding', ${limit*2}, $embedding)
YIELD node AS statement
WHERE statement.userId = $userId
AND statement.invalidAt IS NULL
AND score >= $minSimilarity
WITH statement, gds.similarity.cosine(statement.factEmbedding, $embedding) AS score
WHERE score >= $minSimilarity
RETURN statement, score
ORDER BY score DESC
LIMIT ${limit}
`;
const result = await runQuery(query, {
@ -425,7 +430,6 @@ export async function searchStatementsByEmbedding(params: {
minSimilarity: params.minSimilarity,
limit: params.limit,
userId: params.userId,
topK: params.limit || 100,
});
if (!result || result.length === 0) {

View File

@ -78,7 +78,24 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- Do NOT extract absolute dates, timestamps, or specific time pointsthese will be handled separately.
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
8. **Entity Name Extraction**:
8. **Entity Usefulness Test - SELECTIVITY FILTER**:
Before extracting an entity, ask: "Would this be useful in a search query?"
EXTRACT (Searchable, persistent concepts):
- Named entities: "Sarah", "OpenAI", "Boston", "Albert Heijn"
- Domain concepts: "Preferences", "Home Address", "Annual Review", "Coding Practice"
- Measurements: "10/10 rating", "$2.5 million", "75% completion"
- Abstract concepts: "Lean Startup", "DevOps Culture", "Quality Standards"
SKIP (Transient descriptors, low search value):
- Descriptive phrases: "new files", "existing code", "good practice", "necessary changes"
- Generic qualifiers: "better approach", "current version", "recent updates"
- Verb phrases: "creating documentation", "editing files", "avoiding mistakes"
- Adjective+noun combinations without specificity: "important meeting", "quick fix"
**GUIDELINE**: Extract stable concepts that persist across contexts. Skip ephemeral descriptors tied to single actions.
9. **Entity Name Extraction**:
- Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
- When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car"
- When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
@ -87,7 +104,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
- **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" "Space")
9. **Temporal and Relationship Context Extraction**:
10. **Temporal and Relationship Context Extraction**:
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
- EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
- EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")

View File

@ -5,16 +5,43 @@ export const normalizePrompt = (
): CoreMessage[] => {
const sysPrompt = `You are C.O.R.E. (Contextual Observation & Recall Engine), a smart memory enrichment system.
Create ONE enriched sentence that transforms the episode into a contextually-rich memory using SELECTIVE enrichment.
Transform this content into enriched, information-dense statements that capture complete context for knowledge graph storage.
CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION from the episode. Every separate fact, preference, request, clarification, or detail mentioned must be preserved in your enriched output. Missing information is unacceptable.
CRITICAL: CAPTURE ALL DISTINCT PIECES OF INFORMATION. Every separate fact, preference, request, clarification, specification, or detail mentioned must be preserved in your enriched output. Missing information is unacceptable.
OUTPUT GUIDELINES:
- Simple content (1-2 facts): Use 1-2 concise sentences
- Complex content (multiple facts/categories): Use multiple focused paragraphs, each covering ONE topic area
- Technical content: Preserve specifications, commands, paths, version numbers, configurations
- Let content complexity determine output length - completeness over arbitrary brevity
- IMPORTANT: Break complex content into digestible paragraphs with natural sentence boundaries for easier fact extraction
<enrichment_strategy>
1. PRIMARY FACTS - Always preserve the core information from the episode
2. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using episode timestamp
3. CONTEXT ENRICHMENT - Add context ONLY when it clarifies unclear references
4. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
5. EMOTIONAL PRESERVATION - Maintain the tone and feeling of emotional exchanges
1. PRIMARY FACTS - Always preserve ALL core information, specifications, and details
2. SPEAKER ATTRIBUTION - When content contains self-introductions ("I'm X", "My name is Y"), explicitly preserve speaker identity in third person (e.g., "the user introduced themselves as X" or "X introduced himself/herself")
3. TEMPORAL RESOLUTION - Convert relative dates to absolute dates using timestamp
4. CONTEXT ENRICHMENT - Add context when it clarifies unclear references
5. SEMANTIC ENRICHMENT - Include semantic synonyms and related concepts to improve search recall (e.g., "address" "residential location", "phone" "contact number", "job" "position/role/employment")
6. ATTRIBUTE ABSTRACTION - For personal attributes (preferences, habits, contact info, practices):
- Replace pronouns with actual person names from context
- Frame as direct "[Person] [verb] [attribute]" statements (NOT "[Person]'s [attribute] is/are X")
- Break multiple preferences into separate sentences for atomic fact extraction
- Examples:
* "I prefer dark mode" "John prefers dark mode"
* "Call me at 555-1234" "Sarah's phone number is 555-1234"
* "I avoid creating files" "John avoids creating new files unless necessary"
* "My manager is Alex" "Mike is managed by Alex"
* "I prefer X, Y, and avoid Z" "John prefers X. John prefers Y. John avoids Z."
7. VISUAL CONTENT - Capture exact text on signs, objects shown, specific details from images
8. EMOTIONAL PRESERVATION - Maintain tone and feeling of emotional exchanges
9. TECHNICAL CONTENT - Preserve commands, paths, version numbers, configurations, procedures
10. STRUCTURED CONTENT - Maintain hierarchy, lists, categories, relationships
CONTENT-ADAPTIVE APPROACH:
- Conversations: Focus on dialogue context, relationships, emotional tone
- Documents: Extract structured facts, technical details, categorical organization
- Code/Technical: Preserve functionality, dependencies, configurations, architectural decisions
- Structured Data: Maintain categories, hierarchies, specifications
When to add context from related memories:
- Unclear pronouns ("she", "it", "they") resolve to specific entity
@ -24,7 +51,7 @@ When to add context from related memories:
When NOT to add context:
- Clear, self-contained statements no enrichment needed beyond temporal
- Emotional responses preserve tone, avoid over-contextualization
- Already established topics don't repeat details mentioned earlier in conversation
- Already established topics don't repeat details mentioned earlier in same session
</enrichment_strategy>
<temporal_resolution>
@ -141,64 +168,76 @@ EMPTY ENCOURAGEMENT EXAMPLES (DON'T STORE these):
</quality_control>
<enrichment_examples>
HIGH VALUE enrichment:
- Original: "She said yes!"
SIMPLE CONVERSATION - HIGH VALUE ENRICHMENT:
- Original: "She said yes!"
- Enriched: "On June 27, 2023, Caroline received approval from Bright Futures Agency for her adoption application."
- Why: Resolves unclear pronoun, adds temporal context, identifies the approving entity
MINIMAL enrichment (emotional support):
SIMPLE CONVERSATION - EMOTIONAL SUPPORT:
- Original: "You'll be an awesome mom! Good luck!"
- Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
- Why: Simple temporal context, preserve emotional tone, no historical dumping
ANTI-BLOAT example (what NOT to do):
- Wrong: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom."
- Right: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
SEMANTIC ENRICHMENT FOR BETTER SEARCH:
- Original: "My address is 123 Main St. Boston, MA 02101"
- Enriched: "On October 3, 2025, the user's residential address (home location) is 123 Main St. Boston, MA 02101."
- Why: "residential address" and "home location" as synonyms improve semantic search for queries like "where does user live" or "residential location"
CLEAR REFERENCE (minimal enrichment):
- Original: "Thanks, Caroline! The event was really thought-provoking."
- Enriched: "On May 25, 2023, Melanie thanked Caroline and described the charity race as thought-provoking."
- Why: Clear context doesn't need repetitive anchoring
- Original: "Call me at 555-1234"
- Enriched: "On October 3, 2025, the user's phone number (contact number) is 555-1234."
- Why: "phone number" and "contact number" as synonyms help queries like "how to contact" or "telephone"
CONVERSATION FLOW EXAMPLES:
WRONG (context fatigue): "reinforcing their ongoing conversation about mental health following Melanie's participation in the recent charity race for mental health"
RIGHT (minimal reference): "reinforcing their conversation about mental health"
ATTRIBUTE ABSTRACTION FOR BETTER GRAPH RELATIONSHIPS:
- Original: "I avoid creating new files unless necessary"
- Enriched: "On October 3, 2025, John has a coding practice: avoid creating new files unless necessary."
- Why: Creates direct relationship from person to practice for better graph traversal
WRONG (compound enrichment): "as she begins the process of turning her dream of giving children a loving home into reality and considers specific adoption agencies"
RIGHT (focused): "as she begins pursuing her adoption plans"
- Original: "I prefer editing existing code over writing new code"
- Enriched: "On October 3, 2025, John prefers editing existing code over writing new code."
- Why: Direct preference relationship enables queries like "what are John's preferences"
WRONG (over-contextualization): "following her participation in the May 20, 2023 charity race for mental health awareness"
RIGHT (after first mention): "following the recent charity race"
- Original: "My manager is Sarah"
- Enriched: "On October 3, 2025, Alex is managed by Sarah."
- Why: Direct reporting relationship instead of intermediate "manager" entity
GENERIC IDENTITY PRESERVATION EXAMPLES:
- Original: "my hometown, Boston" Enriched: "Boston, [person]'s hometown"
- Original: "my workplace, Google" Enriched: "Google, [person]'s workplace"
- Original: "my sister, Sarah" Enriched: "Sarah, [person]'s sister"
- Original: "from my university, MIT" Enriched: "from MIT, [person]'s university"
COMPLEX TECHNICAL CONTENT - COMPREHENSIVE EXTRACTION:
- Original: "Working on e-commerce site with Next.js 14. Run pnpm dev to start at port 3000. Using Prisma with PostgreSQL, Stripe for payments, Redis for caching. API routes in /api/*, database migrations in /prisma/migrations."
- Enriched: "On January 15, 2024, the user is developing an e-commerce site built with Next.js 14. Development setup: pnpm dev starts local server on port 3000. Technology stack: Prisma ORM with PostgreSQL database, Stripe integration for payment processing, Redis for caching. Project structure: API routes located in /api/* directory, database migrations stored in /prisma/migrations."
- Why: Preserves ALL technical details, commands, ports, technologies, file paths, dependencies in organized readable format
POSSESSIVE + APPOSITIVE PATTERNS (Critical for Relations):
- Original: "my colleague at my office, Microsoft"
- Enriched: "his colleague at Microsoft, David's workplace"
- Why: Preserves both the work relationship AND the employment identity
STRUCTURED PREFERENCES:
- Original: "I prefer minimalist design, dark mode by default, keyboard shortcuts for navigation, and hate pop-up notifications"
- Enriched: "On March 10, 2024, the user documented their UI/UX preferences: prefers minimalist design aesthetic, dark mode as default theme, keyboard shortcuts for primary navigation, and dislikes pop-up notifications."
- Why: Maintains all distinct preferences as clear, searchable facts
- Original: "my friend from my university, Stanford"
- Enriched: "her friend from Stanford, Lisa's alma mater"
- Why: Establishes both the friendship and educational institution identity
SELF-INTRODUCTION - SPEAKER ATTRIBUTION:
- Original: "I'm John. I'm a Developer. My primary goal with CORE is to build a personal memory system."
- Enriched: "On October 2, 2025, the user introduced themselves as John, a Developer. John's primary goal with CORE is to build a personal memory system."
- Why: Explicitly preserves speaker identity and self-introduction context for proper attribution
- Original: "my neighbor in my city, Chicago"
- Enriched: "his neighbor in Chicago, Mark's hometown"
- Why: Maintains both the neighbor relationship and residence identity
- Original: "Hi, my name is Sarah and I work at Meta as a product manager"
- Enriched: "On January 20, 2024, the user introduced themselves as Sarah, a product manager at Meta."
- Why: Captures self-identification with name, role, and organization attribution
WRONG (loses relationships): reduces to just entity names without preserving the defining relationship
RIGHT (preserves identity): maintains the possessive/definitional connection that establishes entity relationships
ANTI-BLOAT (what NOT to do):
WRONG: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom."
RIGHT: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother."
WRONG (run-on mega-sentence): Cramming 10+ facts into single 200+ word sentence with no structure
RIGHT (organized): Multiple clear sentences or structured paragraphs with natural boundaries
IDENTITY PRESERVATION:
- Original: "my hometown, Boston" "Boston, [person]'s hometown"
- Original: "my colleague at Microsoft" "colleague at Microsoft, [person]'s workplace"
- Why: Maintains possessive/definitional connections establishing entity relationships
</enrichment_examples>
CRITICAL OUTPUT FORMAT REQUIREMENT:
You MUST wrap your response in <output> tags. This is MANDATORY - no exceptions.
If the episode should be stored in memory:
If the content should be stored in memory:
<output>
{{your_enriched_sentence_here}}
{{your_enriched_output_here}}
</output>
If there is nothing worth remembering:
@ -209,10 +248,10 @@ NOTHING_TO_REMEMBER
FAILURE TO USE <output> TAGS WILL RESULT IN EMPTY NORMALIZATION AND SYSTEM FAILURE.
FORMAT EXAMPLES:
CORRECT: <output>On May 25, 2023, Caroline shared her adoption plans with Melanie.</output>
CORRECT (simple): <output>On May 25, 2023, Caroline shared her adoption plans with Melanie.</output>
CORRECT (technical): <output>On January 15, 2024, the user is developing an e-commerce site with Next.js 14. Development: pnpm dev on port 3000. Stack: Prisma with PostgreSQL, Stripe payments, Redis caching. Structure: API routes in /api/*, migrations in /prisma/migrations.</output>
CORRECT: <output>NOTHING_TO_REMEMBER</output>
WRONG: On May 25, 2023, Caroline shared her adoption plans with Melanie.
WRONG: NOTHING_TO_REMEMBER
WRONG: Missing <output> tags entirely
ALWAYS include opening <output> and closing </output> tags around your entire response.
`;

View File

@ -72,6 +72,53 @@ For each entity, systematically check these common patterns:
- Complex multi-hop inferences
- Implicit relationships requiring interpretation
## DIRECT RELATIONSHIP PRIORITY
ALWAYS create direct subjectpredicateobject relationships. Avoid intermediate container entities that add unnecessary graph hops.
PREFERRED (1-hop traversal, optimal recall):
- "Sarah's manager is Mike" Sarah managed_by Mike
- "Alex prefers dark mode" Alex prefers "dark mode"
- "Office in Boston" Office located_in Boston
- "User avoids creating files" User avoids "creating new files"
- "Home address is 123 Main St" User has_home_address "123 Main St, Boston"
AVOID (2-hop traversal, poor recall):
- Sarah has Manager [then] Manager is Mike (adds extra hop)
- Alex has Preferences [then] Preferences includes "dark mode" (adds extra hop)
- Office has Location [then] Location is_in Boston (adds extra hop)
## ATOMIC BUT CONTEXTUAL FACTS
When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
GOOD (Atomic + Contextual):
- "Sarah prefers morning workouts at the gym"
- "Family orders pizza for Friday movie nights"
- "Alex drinks green tea when working late"
- "Doctor recommends stretching exercises for back pain"
- "Team celebrates birthdays with lunch outings"
- "Maria reads fiction books during vacation"
BAD (Atomic but Decontextualized - loses scope):
- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
- "Family orders pizza" (when? weekends? special occasions? always?)
- "Alex drinks green tea" (when? all day? specific times? why?)
- "Doctor recommends stretching" (for what? general health? specific condition?)
- "Team celebrates birthdays" (how? where? what tradition?)
- "Maria reads fiction books" (when? always? specific contexts?)
**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
**Intermediate Entity Exception**: Only create intermediate entities if they represent meaningful concepts with multiple distinct properties:
- "Employment Contract 2024" (has salary, duration, benefits, start_date, role, etc.)
- "Annual Performance Review" (has ratings, achievements, goals, feedback, etc.)
- "User Preferences" (just a container for preference values - use direct User prefers X)
- "Manager" (just points to a person - use direct Sarah managed_by Mike)
- "Home Address" (just holds an address - use direct User has_home_address "address")
**Guideline**: If the intermediate entity would have only 1-2 properties, make it a direct relationship instead.
CRITICAL REQUIREMENT:
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
- The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list.
@ -102,15 +149,6 @@ Follow these instructions:
- predicate: The relationship type (can be a descriptive phrase)
- target: The object entity (MUST be from AVAILABLE ENTITIES)
## SAME-NAME ENTITY RELATIONSHIP FORMATION
When entities share identical names but have different types, CREATE explicit relationship statements:
- **Person-Organization**: "John (Person)" "owns", "founded", "works for", or "leads" "John (Company)"
- **Person-Location**: "Smith (Person)" "lives in", "founded", or "is associated with" "Smith (City)"
- **Event-Location**: "Conference (Event)" "takes place at" or "is hosted by" "Conference (Venue)"
- **Product-Company**: "Tesla (Product)" "is manufactured by" or "is developed by" "Tesla (Company)"
- **MANDATORY**: Always create at least one relationship statement for same-name entities
- **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context
## DURATION AND TEMPORAL CONTEXT ENTITY USAGE
When Duration or TemporalContext entities are available in AVAILABLE ENTITIES:
- **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements
@ -307,6 +345,28 @@ Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, H
**Reference**: Document references Entity
**Employment**: Person works_for Organization
## ATOMIC BUT CONTEXTUAL FACTS
When extracting facts about preferences, practices, habits, or context-specific information, ALWAYS include the scope/context directly in the fact statement itself. This ensures atomic facts retain their contextual boundaries.
GOOD (Atomic + Contextual):
- "Sarah prefers morning workouts at the gym"
- "Family orders pizza for Friday movie nights"
- "Alex drinks green tea when working late"
- "Doctor recommends stretching exercises for back pain"
- "Team celebrates birthdays with lunch outings"
- "Maria reads fiction books during vacation"
BAD (Atomic but Decontextualized - loses scope):
- "Sarah prefers morning workouts" (where? at home? at gym? outdoors?)
- "Family orders pizza" (when? weekends? special occasions? always?)
- "Alex drinks green tea" (when? all day? specific times? why?)
- "Doctor recommends stretching" (for what? general health? specific condition?)
- "Team celebrates birthdays" (how? where? what tradition?)
- "Maria reads fiction books" (when? always? specific contexts?)
**Guideline**: If a preference, practice, habit, or recommendation applies to a specific context (time, place, situation, purpose, condition), embed that context in the natural language fact so the atomic statement preserves its boundaries.
## RELATIONSHIP QUALITY HIERARCHY
## RELATIONSHIP TEMPLATES (High Priority)

View File

@ -1,10 +1,6 @@
import type { EpisodicNode, StatementNode } from "@core/types";
import { logger } from "./logger.service";
import {
applyCohereReranking,
applyCrossEncoderReranking,
applyMultiFactorMMRReranking,
} from "./search/rerank";
import { applyLLMReranking } from "./search/rerank";
import {
getEpisodesByStatements,
performBfsSearch,
@ -14,7 +10,6 @@ import {
import { getEmbedding } from "~/lib/model.server";
import { prisma } from "~/db.server";
import { runQuery } from "~/lib/neo4j.server";
import { env } from "~/env.server";
/**
* SearchService provides methods to search the reified + temporal knowledge graph
@ -36,12 +31,21 @@ export class SearchService {
query: string,
userId: string,
options: SearchOptions = {},
): Promise<{ episodes: string[]; facts: { fact: string; validAt: Date; invalidAt: Date | null; relevantScore: number }[] }> {
source?: string,
): Promise<{
episodes: string[];
facts: {
fact: string;
validAt: Date;
invalidAt: Date | null;
relevantScore: number;
}[];
}> {
const startTime = Date.now();
// Default options
const opts: Required<SearchOptions> = {
limit: options.limit || 10,
limit: options.limit || 100,
maxBfsDepth: options.maxBfsDepth || 4,
validAt: options.validAt || new Date(),
startTime: options.startTime || null,
@ -61,7 +65,7 @@ export class SearchService {
const [bm25Results, vectorResults, bfsResults] = await Promise.all([
performBM25Search(query, userId, opts),
performVectorSearch(queryVector, userId, opts),
performBfsSearch(queryVector, userId, opts),
performBfsSearch(query, queryVector, userId, opts),
]);
logger.info(
@ -71,16 +75,18 @@ export class SearchService {
// 2. Apply reranking strategy
const rankedStatements = await this.rerankResults(
query,
userId,
{ bm25: bm25Results, vector: vectorResults, bfs: bfsResults },
opts,
);
// // 3. Apply adaptive filtering based on score threshold and minimum count
const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
// const filteredResults = rankedStatements;
// 3. Return top results
const episodes = await getEpisodesByStatements(filteredResults.map((item) => item.statement));
const episodes = await getEpisodesByStatements(
filteredResults.map((item) => item.statement),
);
// Log recall asynchronously (don't await to avoid blocking response)
const responseTime = Date.now() - startTime;
@ -90,11 +96,16 @@ export class SearchService {
filteredResults.map((item) => item.statement),
opts,
responseTime,
source,
).catch((error) => {
logger.error("Failed to log recall event:", error);
});
this.updateRecallCount(userId, episodes, filteredResults.map((item) => item.statement));
this.updateRecallCount(
userId,
episodes,
filteredResults.map((item) => item.statement),
);
return {
episodes: episodes.map((episode) => episode.originalContent),
@ -114,7 +125,7 @@ export class SearchService {
private applyAdaptiveFiltering(
results: StatementNode[],
options: Required<SearchOptions>,
): { statement: StatementNode, score: number }[] {
): { statement: StatementNode; score: number }[] {
if (results.length === 0) return [];
let isRRF = false;
@ -152,7 +163,11 @@ export class SearchService {
// If no scores are available, return the original results
if (!hasScores) {
logger.info("No scores found in results, skipping adaptive filtering");
return options.limit > 0 ? results.slice(0, options.limit).map((item) => ({ statement: item, score: 0 })) : results.map((item) => ({ statement: item, score: 0 }));
return options.limit > 0
? results
.slice(0, options.limit)
.map((item) => ({ statement: item, score: 0 }))
: results.map((item) => ({ statement: item, score: 0 }));
}
// Sort by score (descending)
@ -207,9 +222,9 @@ export class SearchService {
const limitedResults =
options.limit > 0
? filteredResults.slice(
0,
Math.min(filteredResults.length, options.limit),
)
0,
Math.min(filteredResults.length, options.limit),
)
: filteredResults;
logger.info(
@ -227,6 +242,7 @@ export class SearchService {
*/
private async rerankResults(
query: string,
userId: string,
results: {
bm25: StatementNode[];
vector: StatementNode[];
@ -234,31 +250,17 @@ export class SearchService {
},
options: Required<SearchOptions>,
): Promise<StatementNode[]> {
// Count non-empty result sources
const nonEmptySources = [
results.bm25.length > 0,
results.vector.length > 0,
results.bfs.length > 0,
].filter(Boolean).length;
if (env.COHERE_API_KEY) {
logger.info("Using Cohere reranking");
return applyCohereReranking(query, results, options);
}
// If results are coming from only one source, use cross-encoder reranking
if (nonEmptySources <= 1) {
logger.info(
"Only one source has results, falling back to cross-encoder reranking",
);
return applyCrossEncoderReranking(query, results);
}
// Otherwise use combined MultiFactorReranking + MMR for multiple sources
return applyMultiFactorMMRReranking(results, {
lambda: 0.7, // Balance relevance (0.7) vs diversity (0.3)
maxResults: options.limit > 0 ? options.limit * 2 : 100, // Get more results for filtering
// Fetch user profile for context
const user = await prisma.user.findUnique({
where: { id: userId },
select: { name: true, id: true },
});
const userContext = user
? { name: user.name ?? undefined, userId: user.id }
: undefined;
return applyLLMReranking(query, results, options.limit, userContext);
}
private async logRecallAsync(
@ -267,6 +269,7 @@ export class SearchService {
results: StatementNode[],
options: Required<SearchOptions>,
responseTime: number,
source?: string,
): Promise<void> {
try {
// Determine target type based on results
@ -317,7 +320,7 @@ export class SearchService {
startTime: options.startTime?.toISOString() || null,
endTime: options.endTime.toISOString(),
}),
source: "search_api",
source: source ?? "search_api",
responseTimeMs: responseTime,
userId,
},

View File

@ -442,6 +442,105 @@ export function applyMultiFactorReranking(results: {
return sortedResults;
}
/**
* Apply LLM-based reranking for contextual understanding
* Uses GPT-4o-mini to verify relevance with semantic reasoning
*/
export async function applyLLMReranking(
query: string,
results: {
bm25: StatementNode[];
vector: StatementNode[];
bfs: StatementNode[];
},
limit: number = 10,
userContext?: { name?: string; userId: string },
): Promise<StatementNode[]> {
const allResults = [
...results.bm25.slice(0, 100),
...results.vector.slice(0, 100),
...results.bfs.slice(0, 100),
];
const uniqueResults = combineAndDeduplicateStatements(allResults);
logger.info(`Unique results: ${uniqueResults.length}`);
if (uniqueResults.length === 0) {
logger.info("No results to rerank with Cohere");
return [];
}
// Build user context section if provided
const userContextSection = userContext?.name
? `\nUser Identity Context:
- The user's name is "${userContext.name}"
- References to "user", "${userContext.name}", or pronouns like "my/their" refer to the same person
- When matching queries about "user's X" or "${userContext.name}'s X", these are equivalent\n`
: "";
const prompt = `You are a relevance filter. Given a user query and a list of facts, identify ONLY the facts that are truly relevant to answering the query.
${userContextSection}
Query: "${query}"
Facts:
${uniqueResults.map((r, i) => `${i}. ${r.fact}`).join("\n")}
Instructions:
- A fact is RELEVANT if it directly answers or provides information needed to answer the query
- A fact is NOT RELEVANT if it's tangentially related but doesn't answer the query
- Consider semantic meaning, not just keyword matching
${userContext?.name ? `- Remember: "user", "${userContext.name}", and possessive references ("my", "their") all refer to the same person` : ""}
- Only return facts with HIGH relevance (80% confidence)
- If you are not sure, return an empty array
Output format:
<output>[1, 5, 7]</output>
Return ONLY the numbers of highly relevant facts inside <output> tags as a JSON array:`;
try {
let responseText = "";
await makeModelCall(
false,
[{ role: "user", content: prompt }],
(text) => {
responseText = text;
},
{ temperature: 0 },
"high",
);
// Extract array from <output>[1, 5, 7]</output>
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {
responseText = outputMatch[1].trim();
const parsedResponse = JSON.parse(responseText || "[]");
const extractedIndices = Array.isArray(parsedResponse)
? parsedResponse
: parsedResponse.entities || [];
if (extractedIndices.length === 0) {
logger.warn(
"LLM reranking returned no valid indices, falling back to original order",
);
return [];
}
logger.info(
`LLM reranking selected ${extractedIndices.length} relevant facts`,
);
const selected = extractedIndices.map((i: number) => uniqueResults[i]);
return selected;
}
return uniqueResults.slice(0, limit);
} catch (error) {
logger.error("LLM reranking failed, falling back to original order:", {
error,
});
return uniqueResults.slice(0, limit);
}
}
/**
* Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching
* This is particularly effective for bridging the semantic gap between questions and factual statements
@ -456,6 +555,7 @@ export async function applyCohereReranking(
options?: {
limit?: number;
model?: string;
useLLMVerification?: boolean;
},
): Promise<StatementNode[]> {
const { model = "rerank-v3.5" } = options || {};
@ -491,10 +591,13 @@ export async function applyCohereReranking(
// Prepare documents for Cohere API
const documents = uniqueResults.map((statement) => statement.fact);
console.log("Documents:", documents);
logger.info(
`Cohere reranking ${documents.length} statements with model ${model}`,
);
logger.info(`Cohere query: "${query}"`);
logger.info(`First 5 documents: ${documents.slice(0, 5).join(" | ")}`);
// Call Cohere Rerank API
const response = await cohere.rerank({
@ -506,14 +609,24 @@ export async function applyCohereReranking(
console.log("Cohere reranking billed units:", response.meta?.billedUnits);
// Log top 5 Cohere results for debugging
logger.info(
`Cohere top 5 results:\n${response.results
.slice(0, 5)
.map(
(r, i) =>
` ${i + 1}. [${r.relevanceScore.toFixed(4)}] ${documents[r.index].substring(0, 80)}...`,
)
.join("\n")}`,
);
// Map results back to StatementNodes with Cohere scores
const rerankedResults = response.results
.map((result, index) => ({
...uniqueResults[result.index],
cohereScore: result.relevanceScore,
cohereRank: index + 1,
}))
.filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));
const rerankedResults = response.results.map((result, index) => ({
...uniqueResults[result.index],
cohereScore: result.relevanceScore,
cohereRank: index + 1,
}));
// .filter((result) => result.cohereScore >= Number(env.COHERE_SCORE_THRESHOLD));
const responseTime = Date.now() - startTime;
logger.info(

View File

@ -3,6 +3,8 @@ import type { SearchOptions } from "../search.server";
import type { Embedding } from "ai";
import { logger } from "../logger.service";
import { runQuery } from "~/lib/neo4j.server";
import { getEmbedding } from "~/lib/model.server";
import { findSimilarEntities } from "../graphModels/entity";
/**
* Perform BM25 keyword-based search on statements
@ -129,25 +131,26 @@ export async function performVectorSearch(
`;
}
// 1. Search for similar statements using Neo4j vector search with provenance count
const limit = options.limit || 100;
// 1. Search for similar statements using GDS cosine similarity with provenance count
const cypher = `
CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
YIELD node AS s, score
MATCH (s:Statement)
WHERE s.userId = $userId
AND score >= 0.7
${timeframeCondition}
${spaceCondition}
WITH s, gds.similarity.cosine(s.factEmbedding, $embedding) AS score
WHERE score >= 0.5
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
WITH s, score, count(episode) as provenanceCount
RETURN s, score, provenanceCount
ORDER BY score DESC
LIMIT ${limit}
`;
const params = {
embedding: query,
userId,
validAt: options.endTime.toISOString(),
topk: options.limit || 100,
...(options.startTime && { startTime: options.startTime.toISOString() }),
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
};
@ -170,133 +173,223 @@ export async function performVectorSearch(
/**
* Perform BFS traversal starting from entities mentioned in the query
* Uses guided search with semantic filtering to reduce noise
*/
export async function performBfsSearch(
query: string,
embedding: Embedding,
userId: string,
options: Required<SearchOptions>,
): Promise<StatementNode[]> {
try {
// 1. Extract potential entities from query
const entities = await extractEntitiesFromQuery(embedding, userId);
// 1. Extract potential entities from query using chunked embeddings
const entities = await extractEntitiesFromQuery(query, userId);
// 2. For each entity, perform BFS traversal
const allStatements: StatementNode[] = [];
for (const entity of entities) {
const statements = await bfsTraversal(
entity.uuid,
options.maxBfsDepth,
options.endTime,
userId,
options.includeInvalidated,
options.startTime,
options.spaceIds,
);
allStatements.push(...statements);
if (entities.length === 0) {
return [];
}
return allStatements;
// 2. Perform guided BFS with semantic filtering
const statements = await bfsTraversal(
entities,
embedding,
options.maxBfsDepth || 3,
options.endTime,
userId,
options.includeInvalidated,
options.startTime,
);
// Return individual statements
return statements;
} catch (error) {
logger.error("BFS search error:", { error });
return [];
}
}
/**
* Perform BFS traversal starting from an entity
* Iterative BFS traversal - explores up to 3 hops level-by-level using Neo4j cosine similarity
*/
export async function bfsTraversal(
startEntityId: string,
async function bfsTraversal(
startEntities: EntityNode[],
queryEmbedding: Embedding,
maxDepth: number,
validAt: Date,
userId: string,
includeInvalidated: boolean,
startTime: Date | null,
spaceIds: string[] = [],
): Promise<StatementNode[]> {
try {
// Build the WHERE clause based on timeframe options
let timeframeCondition = `
AND s.validAt <= $validAt
${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
`;
const RELEVANCE_THRESHOLD = 0.5;
const EXPLORATION_THRESHOLD = 0.3;
// If startTime is provided, add condition to filter by validAt >= startTime
if (startTime) {
timeframeCondition = `
AND s.validAt <= $validAt
${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
AND s.validAt >= $startTime
`;
}
const allStatements = new Map<string, number>(); // uuid -> relevance
const visitedEntities = new Set<string>();
// Add space filtering if spaceIds are provided
let spaceCondition = "";
if (spaceIds.length > 0) {
spaceCondition = `
AND s.spaceIds IS NOT NULL AND ANY(spaceId IN $spaceIds WHERE spaceId IN s.spaceIds)
`;
}
// Track entities per level for iterative BFS
let currentLevelEntities = startEntities.map(e => e.uuid);
// Use Neo4j's built-in path finding capabilities for efficient BFS
// This query implements BFS up to maxDepth and collects all statements along the way
// Timeframe condition for temporal filtering
let timeframeCondition = `
AND s.validAt <= $validAt
${includeInvalidated ? '' : 'AND (s.invalidAt IS NULL OR s.invalidAt > $validAt)'}
`;
if (startTime) {
timeframeCondition += ` AND s.validAt >= $startTime`;
}
// Process each depth level
for (let depth = 0; depth < maxDepth; depth++) {
if (currentLevelEntities.length === 0) break;
// Mark entities as visited at this depth
currentLevelEntities.forEach(id => visitedEntities.add(`${id}`));
// Get statements for current level entities with cosine similarity calculated in Neo4j
const cypher = `
MATCH (e:Entity {uuid: $startEntityId})<-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement)
WHERE
(s.userId = $userId)
${includeInvalidated ? 'AND s.validAt <= $validAt' : timeframeCondition}
${spaceCondition}
RETURN s as statement
MATCH (e:Entity{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]-(s:Statement{userId: $userId})
WHERE e.uuid IN $entityIds
${timeframeCondition}
WITH DISTINCT s // Deduplicate first
WITH s, gds.similarity.cosine(s.factEmbedding, $queryEmbedding) AS relevance
WHERE relevance >= $explorationThreshold
RETURN s.uuid AS uuid, relevance
ORDER BY relevance DESC
LIMIT 200 // Cap per BFS level to avoid explosion
`;
const params = {
startEntityId,
maxDepth,
validAt: validAt.toISOString(),
const records = await runQuery(cypher, {
entityIds: currentLevelEntities,
userId,
includeInvalidated,
queryEmbedding,
explorationThreshold: EXPLORATION_THRESHOLD,
validAt: validAt.toISOString(),
...(startTime && { startTime: startTime.toISOString() }),
...(spaceIds.length > 0 && { spaceIds }),
};
});
const records = await runQuery(cypher, params);
return records.map(
(record) => record.get("statement").properties as StatementNode,
);
} catch (error) {
logger.error("BFS traversal error:", { error });
// Store statement relevance scores
const currentLevelStatementUuids: string[] = [];
for (const record of records) {
const uuid = record.get("uuid");
const relevance = record.get("relevance");
if (!allStatements.has(uuid)) {
allStatements.set(uuid, relevance);
currentLevelStatementUuids.push(uuid);
}
}
// Get connected entities for next level
if (depth < maxDepth - 1 && currentLevelStatementUuids.length > 0) {
const nextCypher = `
MATCH (s:Statement{userId: $userId})-[:HAS_SUBJECT|HAS_OBJECT|HAS_PREDICATE]->(e:Entity{userId: $userId})
WHERE s.uuid IN $statementUuids
RETURN DISTINCT e.uuid AS entityId
`;
const nextRecords = await runQuery(nextCypher, {
statementUuids: currentLevelStatementUuids,
userId
});
// Filter out already visited entities
currentLevelEntities = nextRecords
.map(r => r.get("entityId"))
.filter(id => !visitedEntities.has(`${id}`));
} else {
currentLevelEntities = [];
}
}
// Filter by relevance threshold and fetch full statements
const relevantUuids = Array.from(allStatements.entries())
.filter(([_, relevance]) => relevance >= RELEVANCE_THRESHOLD)
.sort((a, b) => b[1] - a[1])
.map(([uuid]) => uuid);
if (relevantUuids.length === 0) {
return [];
}
const fetchCypher = `
MATCH (s:Statement{userId: $userId})
WHERE s.uuid IN $uuids
RETURN s
`;
const fetchRecords = await runQuery(fetchCypher, { uuids: relevantUuids, userId });
const statements = fetchRecords.map(r => r.get("s").properties as StatementNode);
logger.info(
`BFS: explored ${allStatements.size} statements across ${maxDepth} hops, returning ${statements.length} (≥${RELEVANCE_THRESHOLD})`
);
return statements;
}
/**
* Generate query chunks (individual words and bigrams) for entity extraction
*/
function generateQueryChunks(query: string): string[] {
const words = query.toLowerCase()
.trim()
.split(/\s+/)
.filter(word => word.length > 0);
const chunks: string[] = [];
// Add individual words (for entities like "user")
chunks.push(...words);
// Add bigrams (for multi-word entities like "home address")
for (let i = 0; i < words.length - 1; i++) {
chunks.push(`${words[i]} ${words[i + 1]}`);
}
// Add full query as final chunk
chunks.push(query.toLowerCase().trim());
return chunks;
}
/**
* Extract potential entities from a query using embeddings or LLM
* Extract potential entities from a query using chunked embeddings
* Chunks query into words/bigrams, embeds each chunk, finds entities for each
*/
export async function extractEntitiesFromQuery(
embedding: Embedding,
query: string,
userId: string,
): Promise<EntityNode[]> {
try {
// Use vector similarity to find relevant entities
const cypher = `
// Match entities using vector index on name embeddings
CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
YIELD node AS e, score
WHERE e.userId = $userId
AND score > 0.7
RETURN e
ORDER BY score DESC
`;
// Generate chunks from query
const chunks = generateQueryChunks(query);
const params = {
embedding,
userId,
};
// Get embeddings for each chunk
const chunkEmbeddings = await Promise.all(
chunks.map(chunk => getEmbedding(chunk))
);
const records = await runQuery(cypher, params);
// Search for entities matching each chunk embedding
const allEntitySets = await Promise.all(
chunkEmbeddings.map(async (embedding) => {
return await findSimilarEntities({
queryEmbedding: embedding,
limit: 3,
threshold: 0.7,
userId,
});
})
);
return records.map((record) => record.get("e").properties as EntityNode);
// Flatten and deduplicate entities by ID
const allEntities = allEntitySets.flat();
const uniqueEntities = Array.from(
new Map(allEntities.map(e => [e.uuid, e])).values()
);
return uniqueEntities;
} catch (error) {
logger.error("Entity extraction error:", { error });
return [];

View File

@ -17,13 +17,12 @@ import { generate, processTag } from "./stream-utils";
import { type AgentMessage, AgentMessageType, Message } from "./types";
import { type MCP } from "../utils/mcp";
import {
WebSearchSchema,
type ExecutionState,
type HistoryStep,
type Resource,
type TotalCost,
} from "../utils/types";
import { flattenObject, webSearch } from "../utils/utils";
import { flattenObject } from "../utils/utils";
import { searchMemory, addMemory, searchSpaces } from "./memory-utils";
interface LLMOutputInterface {
@ -119,12 +118,6 @@ const searchSpacesTool = tool({
}),
});
const websearchTool = tool({
description:
"Search the web for current information and news. Use this when you need up-to-date information that might not be in your training data. Try different search strategies: broad terms first, then specific phrases, keywords, exact quotes. Use multiple searches with varied approaches to get comprehensive results.",
parameters: WebSearchSchema,
});
const loadMCPTools = tool({
description:
"Load tools for a specific integration. Call this when you need to use a third-party service.",
@ -310,7 +303,6 @@ export async function* run(
"core--search_memory": searchMemoryTool,
"core--add_memory": addMemoryTool,
"core--search_spaces": searchSpacesTool,
"core--websearch": websearchTool,
"core--load_mcp": loadMCPTools,
};
@ -578,16 +570,6 @@ export async function* run(
});
result = "Search spaces call failed";
}
} else if (toolName === "websearch") {
try {
result = await webSearch(skillInput);
} catch (apiError) {
logger.error("Web search failed", {
apiError,
});
result =
"Web search failed - please check your search configuration";
}
} else if (toolName === "load_mcp") {
// Load MCP integration and update available tools
await mcp.load(skillInput.integration, mcpHeaders);

View File

@ -1,5 +1,5 @@
export const REACT_SYSTEM_PROMPT = `
You are a helpful AI assistant with access to user memory and web search capabilities. Your primary capabilities are:
You are a helpful AI assistant with access to user memory. Your primary capabilities are:
1. **Memory-First Approach**: Always check user memory first to understand context and previous interactions
2. **Intelligent Information Gathering**: Analyze queries to determine if current information is needed
@ -19,43 +19,17 @@ Follow this intelligent approach for information gathering:
- Memory provides context, personal preferences, and historical information
- Use memory to understand user's background, ongoing projects, and past conversations
2. **QUERY ANALYSIS** (Determine Information Needs)
Analyze the user's query to identify if it requires current/latest information:
**Use web search (core--websearch) when query involves:**
- Current events, news, or recent developments
- "Latest", "recent", "current", "today", "now" keywords
- Stock prices, market data, or financial information
- Software updates, version releases, or technical documentation
- Weather, traffic, or real-time data
- Recent changes to websites, APIs, or services
- Product releases, availability, or pricing
- Breaking news or trending topics
- Verification of potentially outdated information
**Examples requiring web search:**
- "What's the latest news about..."
- "Current price of..."
- "Recent updates to..."
- "What happened today..."
- "Latest version of..."
3. **INFORMATION SYNTHESIS** (Combine Sources)
- Combine memory context with web search results when both are relevant
2. **INFORMATION SYNTHESIS** (Combine Sources)
- Use memory to personalize current information based on user preferences
- Cross-reference web findings with user's historical interests from memory
- Always store new useful information in memory using core--add_memory
4. **TRAINING KNOWLEDGE** (Foundation)
3. **TRAINING KNOWLEDGE** (Foundation)
- Use your training knowledge as the foundation for analysis and explanation
- Apply training knowledge to interpret and contextualize information from memory and web
- Fill gaps where memory and web search don't provide complete answers
- Apply training knowledge to interpret and contextualize information from memory
- Indicate when you're using training knowledge vs. live information sources
EXECUTION APPROACH:
- Memory search is mandatory for every interaction
- Web search is conditional based on query analysis
- Both can be executed in parallel when web search is needed
- Always indicate your information sources in responses
</information_gathering>
@ -95,7 +69,6 @@ MEMORY USAGE:
- Blend memory insights naturally into responses
- Verify you've checked relevant memory before finalizing ANY response
If memory access is unavailable, proceed to web search or rely on current conversation
</memory>
<external_services>
@ -113,7 +86,6 @@ You have tools at your disposal to assist users:
CORE PRINCIPLES:
- Use tools only when necessary for the task at hand
- Always check memory FIRST before making other tool calls
- Use web search when query analysis indicates need for current information
- Execute multiple operations in parallel whenever possible
- Use sequential calls only when output of one is required for input of another
@ -162,7 +134,7 @@ QUESTIONS - When you need information:
<p>[Your question with HTML formatting]</p>
</question_response>
- Ask questions only when you cannot find information through memory, web search, or tools
- Ask questions only when you cannot find information through memory, or tools
- Be specific about what you need to know
- Provide context for why you're asking
@ -176,7 +148,7 @@ CRITICAL:
- Apply proper HTML formatting (<h1>, <h2>, <p>, <ul>, <li>, etc.)
- Never mix communication formats
- Keep responses clear and helpful
- Always indicate your information sources (memory, web search, and/or knowledge)
- Always indicate your information sources (memory, and/or knowledge)
</communication>
`;

View File

@ -122,67 +122,3 @@ export interface GenerateResponse {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
toolCalls: any[];
}
export interface WebSearchResult {
results: Array<{
title: string;
url: string;
content: string;
publishedDate: string;
highlights: string[];
text: string;
score: number;
}>;
}
export const WebSearchSchema = z.object({
query: z
.string()
.min(1)
.describe("The search query to find relevant web content"),
numResults: z
.number()
.min(1)
.max(20)
.optional()
.default(5)
.describe("Number of results to return (1-20, default: 5)"),
includeContent: z
.boolean()
.optional()
.default(false)
.describe("Whether to include full page content in results"),
includeHighlights: z
.boolean()
.optional()
.default(false)
.describe("Whether to include relevant text highlights from pages"),
domains: z
.array(z.string())
.optional()
.describe(
'Array of domains to include in search (e.g., ["github.com", "stackoverflow.com"])',
),
excludeDomains: z
.array(z.string())
.optional()
.describe("Array of domains to exclude from search"),
startCrawlDate: z
.string()
.optional()
.describe("Start date for content crawling in YYYY-MM-DD format"),
endCrawlDate: z
.string()
.optional()
.describe("End date for content crawling in YYYY-MM-DD format"),
startPublishedDate: z
.string()
.optional()
.describe("Start date for content publishing in YYYY-MM-DD format"),
endPublishedDate: z
.string()
.optional()
.describe("End date for content publishing in YYYY-MM-DD format"),
});
export type WebSearchArgs = z.infer<typeof WebSearchSchema>;

View File

@ -12,11 +12,7 @@ import {
import { logger } from "@trigger.dev/sdk/v3";
import { type CoreMessage } from "ai";
import {
type WebSearchArgs,
type WebSearchResult,
type HistoryStep,
} from "./types";
import { type HistoryStep } from "./types";
import axios from "axios";
import nodeCrypto from "node:crypto";
import { customAlphabet, nanoid } from "nanoid";
@ -496,72 +492,6 @@ export async function deletePersonalAccessToken(tokenId: string) {
});
}
export async function webSearch(args: WebSearchArgs): Promise<WebSearchResult> {
const apiKey = process.env.EXA_API_KEY;
if (!apiKey) {
throw new Error(
"EXA_API_KEY environment variable is required for web search",
);
}
const exa = new Exa(apiKey);
try {
const searchOptions = {
numResults: args.numResults || 5,
...(args.domains && { includeDomains: args.domains }),
...(args.excludeDomains && { excludeDomains: args.excludeDomains }),
...(args.startCrawlDate && { startCrawlDate: args.startCrawlDate }),
...(args.endCrawlDate && { endCrawlDate: args.endCrawlDate }),
...(args.startPublishedDate && {
startPublishedDate: args.startPublishedDate,
}),
...(args.endPublishedDate && { endPublishedDate: args.endPublishedDate }),
};
let result;
if (args.includeContent || args.includeHighlights) {
// Use searchAndContents for rich results
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const contentsOptions: any = {
...searchOptions,
};
if (args.includeContent) {
contentsOptions.text = true;
}
if (args.includeHighlights) {
contentsOptions.highlights = true;
}
result = await exa.searchAndContents(args.query, contentsOptions);
} else {
// Use basic search for URLs only
result = await exa.search(args.query, searchOptions);
}
return {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
results: result.results.map((item: any) => ({
title: item.title,
url: item.url,
content: item.text,
publishedDate: item.publishedDate,
highlights: item.highlights,
text: item.text,
score: item.score,
})),
};
} catch (error) {
throw new Error(
`Web search failed: ${error instanceof Error ? error.message : "Unknown error"}`,
);
}
}
// Credit management functions have been moved to ~/services/billing.server.ts
// Use deductCredits() instead of these functions
export type CreditOperation = "addEpisode" | "search" | "chatMessage";
@ -720,10 +650,6 @@ export async function deductCredits(
}),
]);
} else {
// Free plan - throw error
throw new InsufficientCreditsError(
"Insufficient credits. Please upgrade to Pro or Max plan to continue.",
);
}
}
}

View File

@ -198,10 +198,15 @@ async function handleMemoryIngest(args: any) {
// Handler for memory_search
async function handleMemorySearch(args: any) {
try {
const results = await searchService.search(args.query, args.userId, {
startTime: args.startTime ? new Date(args.startTime) : undefined,
endTime: args.endTime ? new Date(args.endTime) : undefined,
});
const results = await searchService.search(
args.query,
args.userId,
{
startTime: args.startTime ? new Date(args.startTime) : undefined,
endTime: args.endTime ? new Date(args.endTime) : undefined,
},
args.source,
);
return {
content: [