Feat: AWS bedrock support (#78)

* Feat: add support to AWS bedrock

* Feat: add token counter
Feat: high, low complexity model based on task

* feat: add model complexity selection for batch processing tasks
This commit is contained in:
Manoj 2025-10-01 11:45:35 +05:30 committed by GitHub
parent 7903dd08c3
commit f539ad1ecd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 1844 additions and 321 deletions

View File

@ -51,6 +51,11 @@ OLLAMA_URL=http://ollama:11434
EMBEDDING_MODEL=text-embedding-3-small
MODEL=gpt-4.1-2025-04-14
## AWS Bedrock ##
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=us-east-1
## Trigger ##
TRIGGER_PROJECT_ID=
TRIGGER_SECRET_KEY=

View File

@ -90,6 +90,10 @@ const EnvironmentSchema = z.object({
OLLAMA_URL: z.string().optional(),
COHERE_API_KEY: z.string().optional(),
COHERE_SCORE_THRESHOLD: z.string().default("0.3"),
AWS_ACCESS_KEY_ID: z.string().optional(),
AWS_SECRET_ACCESS_KEY: z.string().optional(),
AWS_REGION: z.string().optional(),
});
export type Environment = z.infer<typeof EnvironmentSchema>;

View File

@ -11,6 +11,7 @@ import {
type BatchResponse,
} from "../types";
import { logger } from "~/services/logger.service";
import { getModelForTask } from "~/lib/model.server";
export class OpenAIBatchProvider extends BaseBatchProvider {
providerName = "openai";
@ -40,13 +41,14 @@ export class OpenAIBatchProvider extends BaseBatchProvider {
try {
this.validateRequests(params.requests);
const model = getModelForTask(params.modelComplexity || 'high');
// Convert requests to OpenAI batch format
const batchRequests = params.requests.map((request, index) => ({
custom_id: request.customId,
method: "POST" as const,
url: "/v1/chat/completions",
body: {
model: process.env.MODEL as string,
model,
messages: request.systemPrompt
? [
{ role: "system" as const, content: request.systemPrompt },

View File

@ -3,6 +3,7 @@ import { z } from "zod";
export type BatchStatus = "pending" | "processing" | "completed" | "failed" | "cancelled";
export type ModelComplexity = 'high' | 'low';
export interface BatchRequest {
customId: string;
messages: CoreMessage[];
@ -39,6 +40,7 @@ export interface CreateBatchParams<T = any> {
outputSchema?: z.ZodSchema<T>;
maxRetries?: number;
timeoutMs?: number;
modelComplexity?: ModelComplexity;
}
export interface GetBatchParams {

View File

@ -11,15 +11,62 @@ import { logger } from "~/services/logger.service";
import { createOllama, type OllamaProvider } from "ollama-ai-provider";
import { anthropic } from "@ai-sdk/anthropic";
import { google } from "@ai-sdk/google";
import { createAmazonBedrock } from "@ai-sdk/amazon-bedrock";
import { fromNodeProviderChain } from "@aws-sdk/credential-providers";
export type ModelComplexity = 'high' | 'low';
/**
* Get the appropriate model for a given complexity level.
* HIGH complexity uses the configured MODEL.
* LOW complexity automatically downgrades to cheaper variants if possible.
*/
export function getModelForTask(complexity: ModelComplexity = 'high'): string {
const baseModel = process.env.MODEL || 'gpt-4.1-2025-04-14';
// HIGH complexity - always use the configured model
if (complexity === 'high') {
return baseModel;
}
// LOW complexity - automatically downgrade expensive models to cheaper variants
// If already using a cheap model, keep it
const downgrades: Record<string, string> = {
// OpenAI downgrades
'gpt-5-2025-08-07': 'gpt-5-mini-2025-08-07',
'gpt-4.1-2025-04-14': 'gpt-4.1-mini-2025-04-14',
// Anthropic downgrades
'claude-sonnet-4-5': 'claude-3-5-haiku-20241022',
'claude-3-7-sonnet-20250219': 'claude-3-5-haiku-20241022',
'claude-3-opus-20240229': 'claude-3-5-haiku-20241022',
// Google downgrades
'gemini-2.5-pro-preview-03-25': 'gemini-2.5-flash-preview-04-17',
'gemini-2.0-flash': 'gemini-2.0-flash-lite',
// AWS Bedrock downgrades (keep same model - already cost-optimized)
'us.amazon.nova-premier-v1:0': 'us.amazon.nova-premier-v1:0',
};
return downgrades[baseModel] || baseModel;
}
export interface TokenUsage {
promptTokens: number;
completionTokens: number;
totalTokens: number;
}
export async function makeModelCall(
stream: boolean,
messages: CoreMessage[],
onFinish: (text: string, model: string) => void,
onFinish: (text: string, model: string, usage?: TokenUsage) => void,
options?: any,
complexity: ModelComplexity = 'high',
) {
let modelInstance;
const model = process.env.MODEL as any;
let modelInstance: LanguageModelV1 | undefined;
let model = getModelForTask(complexity);
const ollamaUrl = process.env.OLLAMA_URL;
let ollama: OllamaProvider | undefined;
@ -29,6 +76,15 @@ export async function makeModelCall(
});
}
const bedrock = createAmazonBedrock({
region: process.env.AWS_REGION || 'us-east-1',
credentialProvider: fromNodeProviderChain(),
});
const generateTextOptions: any = {}
console.log('complexity:', complexity, 'model:', model)
switch (model) {
case "gpt-4.1-2025-04-14":
case "gpt-4.1-mini-2025-04-14":
@ -36,6 +92,7 @@ export async function makeModelCall(
case "gpt-5-2025-08-07":
case "gpt-4.1-nano-2025-04-14":
modelInstance = openai(model, { ...options });
generateTextOptions.temperature = 1
break;
case "claude-3-7-sonnet-20250219":
@ -51,6 +108,16 @@ export async function makeModelCall(
modelInstance = google(model, { ...options });
break;
case "us.meta.llama3-3-70b-instruct-v1:0":
case "us.deepseek.r1-v1:0":
case "qwen.qwen3-32b-v1:0":
case "openai.gpt-oss-120b-1:0":
case "us.mistral.pixtral-large-2502-v1:0":
case "us.amazon.nova-premier-v1:0":
modelInstance = bedrock(`${model}`);
generateTextOptions.maxTokens = 100000
break;
default:
if (ollama) {
modelInstance = ollama(model);
@ -59,26 +126,71 @@ export async function makeModelCall(
break;
}
if (!modelInstance) {
throw new Error(`Unsupported model type: ${model}`);
}
if (stream) {
return streamText({
model: modelInstance as LanguageModelV1,
model: modelInstance,
messages,
onFinish: async ({ text }) => {
onFinish(text, model);
...generateTextOptions,
onFinish: async ({ text, usage }) => {
const tokenUsage = usage ? {
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
} : undefined;
if (tokenUsage) {
logger.log(`[${complexity.toUpperCase()}] ${model} - Tokens: ${tokenUsage.totalTokens} (prompt: ${tokenUsage.promptTokens}, completion: ${tokenUsage.completionTokens})`);
}
onFinish(text, model, tokenUsage);
},
});
}
const { text } = await generateText({
model: modelInstance as LanguageModelV1,
const { text, usage } = await generateText({
model: modelInstance,
messages,
...generateTextOptions,
});
onFinish(text, model);
const tokenUsage = usage ? {
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
} : undefined;
if (tokenUsage) {
logger.log(`[${complexity.toUpperCase()}] ${model} - Tokens: ${tokenUsage.totalTokens} (prompt: ${tokenUsage.promptTokens}, completion: ${tokenUsage.completionTokens})`);
}
onFinish(text, model, tokenUsage);
return text;
}
/**
* Determines if a given model is proprietary (OpenAI, Anthropic, Google, Grok)
* or open source (accessed via Bedrock, Ollama, etc.)
*/
export function isProprietaryModel(modelName?: string, complexity: ModelComplexity = 'high'): boolean {
const model = modelName || getModelForTask(complexity);
if (!model) return false;
// Proprietary model patterns
const proprietaryPatterns = [
/^gpt-/, // OpenAI models
/^claude-/, // Anthropic models
/^gemini-/, // Google models
/^grok-/, // xAI models
];
return proprietaryPatterns.some(pattern => pattern.test(model));
}
export async function getEmbedding(text: string) {
const ollamaUrl = process.env.OLLAMA_URL;

View File

@ -19,6 +19,7 @@ import {
} from "./prompts/nodes";
import {
extractStatements,
extractStatementsOSS,
resolveStatementPrompt,
} from "./prompts/statements";
import {
@ -39,7 +40,7 @@ import {
saveTriple,
searchStatementsByEmbedding,
} from "./graphModels/statement";
import { getEmbedding, makeModelCall } from "~/lib/model.server";
import { getEmbedding, makeModelCall, isProprietaryModel } from "~/lib/model.server";
import { runQuery } from "~/lib/neo4j.server";
import { Apps, getNodeTypesString } from "~/utils/presets/nodes";
import { normalizePrompt, normalizeDocumentPrompt } from "./prompts";
@ -228,10 +229,20 @@ export class KnowledgeGraphService {
episodeUuid: string | null;
statementsCreated: number;
processingTimeMs: number;
tokenUsage?: {
high: { input: number; output: number; total: number };
low: { input: number; output: number; total: number };
};
}> {
const startTime = Date.now();
const now = new Date();
// Track token usage by complexity
const tokenMetrics = {
high: { input: 0, output: 0, total: 0 },
low: { input: 0, output: 0, total: 0 },
};
try {
// Step 1: Context Retrieval - Get previous episodes for context
const previousEpisodes = await getRecentEpisodes({
@ -258,6 +269,7 @@ export class KnowledgeGraphService {
params.source,
params.userId,
prisma,
tokenMetrics,
new Date(params.referenceTime),
sessionContext,
params.type,
@ -295,6 +307,7 @@ export class KnowledgeGraphService {
const extractedNodes = await this.extractEntities(
episode,
previousEpisodes,
tokenMetrics,
);
console.log(extractedNodes.map((node) => node.name));
@ -316,6 +329,7 @@ export class KnowledgeGraphService {
episode,
categorizedEntities,
previousEpisodes,
tokenMetrics,
);
const extractedStatementsTime = Date.now();
@ -328,6 +342,7 @@ export class KnowledgeGraphService {
extractedStatements,
episode,
previousEpisodes,
tokenMetrics,
);
const resolvedTriplesTime = Date.now();
@ -341,6 +356,7 @@ export class KnowledgeGraphService {
resolvedTriples,
episode,
previousEpisodes,
tokenMetrics,
);
const resolvedStatementsTime = Date.now();
@ -407,6 +423,7 @@ export class KnowledgeGraphService {
// nodesCreated: hydratedNodes.length,
statementsCreated: resolvedStatements.length,
processingTimeMs,
tokenUsage: tokenMetrics,
};
} catch (error) {
console.error("Error in addEpisode:", error);
@ -420,6 +437,7 @@ export class KnowledgeGraphService {
private async extractEntities(
episode: EpisodicNode,
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
): Promise<EntityNode[]> {
// Use the prompt library to get the appropriate prompts
const context = {
@ -436,9 +454,15 @@ export class KnowledgeGraphService {
let responseText = "";
await makeModelCall(false, messages as CoreMessage[], (text) => {
// Entity extraction requires HIGH complexity (creative reasoning, nuanced NER)
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
});
if (usage) {
tokenMetrics.high.input += usage.promptTokens;
tokenMetrics.high.output += usage.completionTokens;
tokenMetrics.high.total += usage.totalTokens;
}
}, undefined, 'high');
// Convert to EntityNode objects
let entities: EntityNode[] = [];
@ -447,19 +471,21 @@ export class KnowledgeGraphService {
if (outputMatch && outputMatch[1]) {
responseText = outputMatch[1].trim();
const extractedEntities = JSON.parse(responseText || "{}").entities || [];
const parsedResponse = JSON.parse(responseText || "[]");
// Handle both old format {entities: [...]} and new format [...]
const extractedEntities = Array.isArray(parsedResponse) ? parsedResponse : (parsedResponse.entities || []);
// Batch generate embeddings for entity names
const entityNames = extractedEntities.map((entity: any) => entity.name);
const entityNames = Array.isArray(extractedEntities[0]) ? extractedEntities : extractedEntities.map((entity: any) => entity.name || entity);
const nameEmbeddings = await Promise.all(
entityNames.map((name: string) => this.getEmbedding(name)),
);
entities = extractedEntities.map((entity: any, index: number) => ({
uuid: crypto.randomUUID(),
name: entity.name,
name: typeof entity === 'string' ? entity : entity.name,
type: undefined, // Type will be inferred from statements
attributes: entity.attributes || {},
attributes: typeof entity === 'string' ? {} : (entity.attributes || {}),
nameEmbedding: nameEmbeddings[index],
typeEmbedding: undefined, // No type embedding needed
createdAt: new Date(),
@ -481,6 +507,7 @@ export class KnowledgeGraphService {
expanded: EntityNode[];
},
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
): Promise<Triple[]> {
// Use the prompt library to get the appropriate prompts
const context = {
@ -502,13 +529,21 @@ export class KnowledgeGraphService {
referenceTime: episode.validAt.toISOString(),
};
// Get the statement extraction prompt from the prompt library
const messages = extractStatements(context);
// Statement extraction requires HIGH complexity (causal reasoning, emotional context)
// Choose between proprietary and OSS prompts based on model type
const messages = isProprietaryModel(undefined, 'high')
? extractStatements(context)
: extractStatementsOSS(context);
let responseText = "";
await makeModelCall(false, messages as CoreMessage[], (text) => {
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
});
if (usage) {
tokenMetrics.high.input += usage.promptTokens;
tokenMetrics.high.output += usage.completionTokens;
tokenMetrics.high.total += usage.totalTokens;
}
}, undefined, 'high');
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {
@ -518,8 +553,11 @@ export class KnowledgeGraphService {
}
// Parse the statements from the LLM response
const extractedTriples: ExtractedTripleData[] =
JSON.parse(responseText || "{}").edges || [];
const parsedResponse = JSON.parse(responseText || "[]");
// Handle both old format {"edges": [...]} and new format [...]
const extractedTriples: ExtractedTripleData[] = Array.isArray(parsedResponse)
? parsedResponse
: (parsedResponse.edges || []);
console.log(`extracted triples length: ${extractedTriples.length}`);
@ -639,6 +677,7 @@ export class KnowledgeGraphService {
triples: Triple[],
episode: EpisodicNode,
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
): Promise<Triple[]> {
// Step 1: Extract unique entities from triples
const uniqueEntitiesMap = new Map<string, EntityNode>();
@ -764,9 +803,15 @@ export class KnowledgeGraphService {
const messages = dedupeNodes(dedupeContext);
let responseText = "";
await makeModelCall(false, messages as CoreMessage[], (text) => {
// Entity deduplication is LOW complexity (pattern matching, similarity comparison)
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
});
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
// Step 5: Process LLM response
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
@ -847,6 +892,7 @@ export class KnowledgeGraphService {
triples: Triple[],
episode: EpisodicNode,
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
): Promise<{
resolvedStatements: Triple[];
invalidatedStatements: string[];
@ -999,10 +1045,15 @@ export class KnowledgeGraphService {
let responseText = "";
// Call the LLM to analyze all statements at once
await makeModelCall(false, messages, (text) => {
// Statement resolution is LOW complexity (rule-based duplicate/contradiction detection)
await makeModelCall(false, messages, (text, _model, usage) => {
responseText = text;
});
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
try {
// Extract the JSON response from the output tags
@ -1083,6 +1134,7 @@ export class KnowledgeGraphService {
private async addAttributesToEntities(
triples: Triple[],
episode: EpisodicNode,
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
): Promise<Triple[]> {
// Collect all unique entities from the triples
const entityMap = new Map<string, EntityNode>();
@ -1122,10 +1174,15 @@ export class KnowledgeGraphService {
let responseText = "";
// Call the LLM to extract attributes
await makeModelCall(false, messages as CoreMessage[], (text) => {
// Attribute extraction is LOW complexity (simple key-value extraction)
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
});
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
try {
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
@ -1163,6 +1220,7 @@ export class KnowledgeGraphService {
source: string,
userId: string,
prisma: PrismaClient,
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
episodeTimestamp?: Date,
sessionContext?: string,
contentType?: EpisodeType,
@ -1197,10 +1255,16 @@ export class KnowledgeGraphService {
contentType === EpisodeTypeEnum.DOCUMENT
? normalizeDocumentPrompt(context)
: normalizePrompt(context);
// Normalization is LOW complexity (text cleaning and standardization)
let responseText = "";
await makeModelCall(false, messages, (text) => {
await makeModelCall(false, messages, (text, _model, usage) => {
responseText = text;
});
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
let normalizedEpisodeBody = "";
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {

View File

@ -45,19 +45,40 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- Personal experience descriptions
- Memory/reflection statements
3. **Type and Concept Entity Extraction**:
3. **NAMED ENTITY EXTRACTION**:
- **PEOPLE NAMES**: Extract all proper names of individuals (e.g., "Luna", "Albert", "John Smith")
- **ORGANIZATION NAMES**: Extract company/brand names (e.g., "SUSE", "Albert Heijn", "TEEKS", "Google")
- **PLACE NAMES**: Extract specific locations (e.g., "Amstelveen", "Bruges", "Eze", "Netherlands", "Europe")
- **PRODUCT/SERVICE NAMES**: Extract named products, services, or systems (e.g., "iPhone", "Tesla Model S")
- **EVENT NAMES**: Extract named events, conferences, or specific occasions
4. **MEASUREMENT & QUANTITATIVE EXTRACTION**:
- **NUMERICAL RATINGS**: Extract rating values and scores (e.g., "10/10", "8.5/10", "5-star")
- **PRICES & CURRENCY**: Extract monetary values (e.g., "₹40 crore", "$100", "€50")
- **QUANTITIES**: Extract specific measurements (e.g., "5 kilometers", "3 months", "2 hours")
- **PERCENTAGES**: Extract percentage values (e.g., "85%", "half", "majority")
- **QUALITY DESCRIPTORS**: Extract qualitative ratings (e.g., "excellent", "poor", "outstanding")
5. **CULTURAL & ABSTRACT CONCEPT EXTRACTION**:
- **CULTURAL CONCEPTS**: Extract cultural ideas, traditions, or practices mentioned
- **PROCESS CONCEPTS**: Extract named processes, methodologies, or systems
- **ABSTRACT IDEAS**: Extract philosophical, emotional, or conceptual entities
- **DOMAINS & FIELDS**: Extract subject areas, industries, or fields of knowledge
- **STANDARDS & FRAMEWORKS**: Extract methodologies, standards, or organizational frameworks
6. **Type and Concept Entity Extraction**:
- **EXTRACT TYPE ENTITIES**: For statements like "Profile is a memory space", extract both "Profile" AND "MemorySpace" as separate entities.
- **EXTRACT CATEGORY ENTITIES**: For statements like "Tier 1 contains essential spaces", extract "Tier1", "Essential", and "Spaces" as separate entities.
- **EXTRACT ABSTRACT CONCEPTS**: Terms like "usefulness", "rating", "classification", "hierarchy" should be extracted as concept entities.
- **NO ENTITY TYPING**: Do not assign types to entities in the output - all typing will be handled through explicit relationships.
4. **Exclusions**:
7. **Exclusions**:
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
- **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements.
- Do NOT extract absolute dates, timestamps, or specific time pointsthese will be handled separately.
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
5. **Entity Name Extraction**:
8. **Entity Name Extraction**:
- Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
- When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car"
- When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
@ -66,7 +87,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
- **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" "Space")
6. **Temporal and Relationship Context Extraction**:
9. **Temporal and Relationship Context Extraction**:
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
- EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
- EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
@ -88,6 +109,30 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- Text: "essential classification tier" Extract: "Essential", "Classification", "Tier"
- Text: "hierarchical memory system" Extract: "Hierarchical", "Memory", "System"
**NAMED ENTITY EXAMPLES:**
**PEOPLE & ORGANIZATIONS:**
- Text: "Sarah joined Meta last year" Extract: "Sarah", "Meta"
- Text: "Meeting with David from OpenAI" Extract: "David", "OpenAI"
- Text: "Dr. Chen works at Stanford Research" Extract: "Dr. Chen", "Stanford Research"
- Text: "Amazon's new initiative" Extract: "Amazon", "Initiative"
**PLACES & LOCATIONS:**
- Text: "Conference in Tokyo this summer" Extract: "Conference", "Tokyo"
- Text: "Moving from Portland to Austin" Extract: "Portland", "Austin"
- Text: "Remote office in Berlin" Extract: "Remote Office", "Berlin"
**MEASUREMENTS & QUANTITATIVE:**
- Text: "Project scored 9/10" Extract: "Project", "9/10"
- Text: "Budget of $2.5 million" Extract: "Budget", "$2.5 million"
- Text: "Outstanding performance" Extract: "Performance", "Outstanding"
- Text: "75% completion rate" Extract: "Completion Rate", "75%"
**CULTURAL & ABSTRACT CONCEPTS:**
- Text: "Lean startup methodology" Extract: "Lean Startup", "Methodology"
- Text: "Zen meditation practice" Extract: "Zen", "Meditation", "Practice"
- Text: "DevOps culture transformation" Extract: "DevOps", "Culture", "Transformation"
**TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:**
**EXTRACT - Relationship Temporal Information:**
@ -123,20 +168,24 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
- Text: "10/10 usefulness rating" Extract: "Usefulness", "Rating"
**INCORRECT:**
- Text: "Profile is a memory space" Only extract: "Profile"
- Text: "Profile is a memory space" Only extract: "Profile"
- Text: "authentication system" Extract: "authentication system" (should be "Authentication", "System")
- Text: "payment service" Extract: "payment service" (should be "Payment", "Service")
## CRITICAL OUTPUT FORMAT REQUIREMENTS:
**YOU MUST STRICTLY FOLLOW THIS EXACT FORMAT:**
<output>
{
"entities": [
{
"name": "Entity Name"
}
// Additional entities...
]
}
</output>`;
["Entity 1", "Entity 2", "Entity 3", ...]
</output>
**MANDATORY RULES:**
1. Start with exactly: <output>
2. Simple JSON array of entity names only
3. Each entity as a string: "EntityName"
4. End with exactly: </output>
5. NO additional text, NO comments, NO explanations`;
const contentLabel = extractionMode === 'conversation' ? 'CURRENT EPISODE' : 'TEXT';
const userPrompt = `
@ -226,6 +275,15 @@ Format your response as follows:
}
</output>
## CRITICAL OUTPUT FORMAT REQUIREMENTS:
**YOU MUST STRICTLY FOLLOW THESE FORMAT RULES:**
1. **ALWAYS use <output> tags** - Never use any other tag format
2. **ONLY output valid JSON** within the <output> tags
3. **NO additional text** before or after the <output> tags
4. **NO comments** inside the JSON
5. **REQUIRED structure:** Must follow exact JSON schema shown above
## Important Instructions:
- Always include all entities from the input in your response
- Always wrap the output in these tags <output> </output>
@ -272,20 +330,26 @@ Common attribute types to consider:
- Temporal information (duration, frequency, etc.)
- Qualitative aspects (importance, preference, etc.)
Provide your output in this structure:
## CRITICAL OUTPUT FORMAT REQUIREMENTS:
**YOU MUST STRICTLY FOLLOW THESE FORMAT RULES:**
1. **ALWAYS use <output> tags** - Never use any other tag format
2. **ONLY output valid JSON** within the <output> tags
3. **NO additional text** before or after the <output> tags
4. **NO comments** inside the JSON
5. **REQUIRED structure:** Must follow exact JSON schema shown below
<output>
{
"entities": [
{
"uuid": "entity-uuid",
"attributes": {
"attributeName1": "value1",
"attributeName2": "value2",
...
}
},
...
]
"entities": [
{
"uuid": "entity-uuid",
"attributes": {
"attributeName1": "value1",
"attributeName2": "value2"
}
}
]
}
</output>`;

View File

@ -150,26 +150,21 @@ CRITICAL TEMPORAL INFORMATION HANDLING:
- "event_date": "[resolved ISO date ~1 month after episode date, e.g., '2023-07-27T00:00:00.000Z']"
- "temporal_context": "next month"
Format your response as a JSON object with the following structure:
Format your response as a JSON array with the following structure:
<output>
{
"edges": [
{
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
"predicate": "[Relationship Type]",
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
"fact": "[Natural language representation of the fact]",
"attributes": {
"confidence": confidence of the fact,
"source": "explicit or implicit source type",
"event_date": "ISO date when the fact/event actually occurred (if applicable)",
"temporal_context": "original temporal description (e.g., 'last week', 'recently')",
"duration": "duration information from Duration entities (e.g., '4 years', '2 months')",
"context": "contextual information from TemporalContext entities (e.g., 'since moving', 'after breakup')"
}
[
{
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
"predicate": "[Relationship Type]",
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
"fact": "[Natural language representation of the fact]",
"attributes": {
"event_date": "ISO date when the fact/event actually occurred (if applicable)",
"duration": "duration information from Duration entities (e.g., '4 years', '2 months')",
"context": "contextual information from TemporalContext entities (e.g., 'since moving', 'after breakup')"
}
]
}
}
]
</output>
IMPORTANT RULES:
@ -227,6 +222,298 @@ ${JSON.stringify(context.entities.expanded, null, 2)}
];
};
export const extractStatementsOSS = (
context: Record<string, any>,
): CoreMessage[] => {
return [
{
role: "system",
content: `## WHO→WHAT→WHOM INSTRUCTIONS
**WHO**: You are a knowledge graph extraction expert specializing in relationship identification
**WHAT**: Extract factual statements from text as subject-predicate-object triples for knowledge graph construction
**WHOM**: For the CORE memory system that helps AI tools maintain persistent, structured knowledge
## CONTEXTUAL EXTRACTION PROCESS
Think through this systematically with **NARRATIVE CONTEXT** awareness:
**STEP 1: UNDERSTAND THE EPISODE CONTEXT**
- What is the main conversation/topic about? (e.g., "entity extraction optimization", "travel journal analysis")
- What is the PURPOSE of this content? (e.g., "improving AI performance", "documenting experiences")
- What PROCESS is happening? (e.g., "testing new examples", "implementing features")
**STEP 2: IDENTIFY ACTORS WITH CONTEXT**
- Who are the people, entities, or agents mentioned?
- WHY are they mentioned? (examples in prompt, participants in process, subjects of discussion)
- What ROLE do they play in this context? (test cases, real people, organizational entities)
**STEP 3: ANALYZE ACTIONS & EXPERIENCES WITH PURPOSE**
- What actions did actors perform? (traveled, worked, created)
- What did actors experience? (felt, observed, encountered)
- What states did actors have? (lived in, owned, knew)
- **CRITICALLY**: WHY are these actions/experiences being discussed? (examples, optimizations, improvements)
**STEP 4: FIND CAUSAL CONNECTIONS & CONTEXTUAL SIGNIFICANCE**
- What caused what? (event emotion, condition outcome)
- How did events make actors FEEL? (forgotten item anxiety, beauty appreciation)
- What influenced decisions? (experience preference, problem solution)
- **KEY**: How do these relationships serve the larger context/purpose?
**STEP 5: CAPTURE TEMPORAL & EPISODE LINKAGE**
- When did events occur? (dates, sequences, durations)
- Where did actions happen? (locations, contexts)
- What were the circumstances? (conditions, motivations)
- **EPISODE CONNECTION**: How does this relate to the ongoing conversation/process?
**STEP 6: FORM CONTEXT-AWARE RELATIONSHIPS**
- Use actors, actions, and objects from above steps
- **ENHANCE** with contextual significance (WHY this relationship matters)
- Include episode provenance in natural language fact descriptions
- Ensure each relationship tells a meaningful story WITH context
## PHASE 1: FOUNDATIONAL RELATIONSHIPS (HIGHEST PRIORITY)
Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, HOW
### 1A: ACTOR-ACTION RELATIONSHIPS
- Subject performs action: "Entity" "performed" "Action"
- Subject experiences state: "Entity" "experienced" "State"
- Subject has attribute: "Entity" "has" "Property"
- Subject creates/produces: "Entity" "created" "Object"
### 1B: SPATIAL & HIERARCHICAL RELATIONSHIPS
- Location membership: "Entity" "located_in" "Location"
- Categorical membership: "Entity" "is_a" "Category"
- Hierarchical structure: "Entity" "part_of" "System"
- Containment: "Container" "contains" "Item"
### 1C: TEMPORAL & SEQUENTIAL RELATIONSHIPS
- Duration facts: "Event" "lasted" "Duration"
- Sequence facts: "Event" "occurred_before" "Event"
- Temporal anchoring: "Event" "occurred_during" "Period"
- Timing: "Action" "happened_on" "Date"
### 1D: SUBJECTIVE & EVALUATIVE RELATIONSHIPS
- Opinions: "Subject" "opinion_about" "Object"
- Preferences: "Subject" "prefers" "Object"
- Evaluations: "Subject" "rated" "Object"
- Desires: "Subject" "wants" "Object"
## SYSTEMATIC EXTRACTION PATTERNS
**Type/Category**: Entity is_a Type
**Ownership**: Actor owns/controls Resource
**Participation**: Actor participates_in Event
**Location**: Entity located_in/part_of Place
**Temporal**: Event occurred_during TimeFrame
**Rating/Measurement**: Subject rated/measured Object
**Reference**: Document references Entity
**Employment**: Person works_for Organization
## RELATIONSHIP QUALITY HIERARCHY
## RELATIONSHIP TEMPLATES (High Priority)
**NARRATIVE RELATIONSHIPS:**
- "Actor" "experienced" "Emotion/State"
- "Actor" "appreciated" "Aspect"
- "Actor" "found" "Subject" "Evaluation"
- "Actor" "felt" "Emotion" "about" "Subject"
**CAUSAL & EMOTIONAL RELATIONSHIPS:**
- "Event" "caused" "Actor" "to feel" "Emotion"
- "Experience" "made" "Actor" "appreciate" "Aspect"
- "Problem" "led to" "Actor" "feeling" "Frustration"
- "Beauty" "evoked" "Actor's" "Sense of wonder"
- "Difficulty" "resulted in" "Actor" "seeking" "Solution"
- "Success" "boosted" "Actor's" "Confidence"
**CROSS-EVENT RELATIONSHIPS:**
- "Experience A" "influenced" "Actor's view of" "Experience B"
- "Previous trip" "shaped" "Actor's" "Travel expectations"
- "Cultural encounter" "changed" "Actor's" "Perspective on" "Topic"
- "Mistake" "taught" "Actor" "to avoid" "Similar situation"
**TEMPORAL RELATIONSHIPS:**
- "Actor" "spent" "Duration" "doing" "Activity"
- "Event" "occurred during" "TimeFrame"
- "Actor" "planned" "FutureAction"
- "Experience" "happened before" "Decision"
**ESSENTIAL (Extract Always)**:
- Categorical membership (is_a, type_of)
- Spatial relationships (located_in, part_of)
- Actor-action relationships (performed, experienced, created)
- Ownership/control relationships (owns, controls, manages)
- Employment relationships (works_for, employed_by)
**VALUABLE (Extract When Present)**:
- Temporal sequences and durations
- Subjective opinions and evaluations
- Cross-references and citations
- Participation and attendance
**CONTEXTUAL (Extract If Space Permits)**:
- Complex multi-hop inferences
- Implicit relationships requiring interpretation
CRITICAL REQUIREMENT:
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
- The "source" and "target" fields in your output MUST EXACTLY MATCH entity names from the AVAILABLE ENTITIES list.
- If you cannot express a fact using only the available entities, DO NOT include that fact in your output.
- DO NOT create, invent, or modify any entity names.
- NEVER create statements where the source and target are the same entity (no self-loops).
ENTITY PRIORITIZATION:
- **PRIMARY ENTITIES**: Directly extracted from the current episode - these are your main focus
- **EXPANDED ENTITIES**: From related contexts - only use if they're explicitly mentioned or contextually relevant
RELATIONSHIP FORMATION RULES:
1. **PRIMARY-PRIMARY**: Always consider relationships between primary entities
2. **PRIMARY-EXPANDED**: Only if the expanded entity is mentioned in the episode content
3. **EXPANDED-EXPANDED**: Avoid unless there's explicit connection in the episode
INSTRUCTIONS:
1. **SYSTEMATIC ANALYSIS**: Check all foundational relationship patterns for each entity
2. **PATTERN COMPLETION**: If pattern exists for one entity, verify coverage for all applicable entities
3. **SAME-NAME ENTITIES**: Connect entities with identical names but different types
4. **STRUCTURAL FOUNDATION**: Prioritize basic relationships over complex interpretations
## SAME-NAME ENTITY RELATIONSHIP FORMATION
When entities share identical names but have different types, CREATE explicit relationship statements:
- **Person-Organization**: "John (Person)" "owns", "founded", "works for", or "leads" "John (Company)"
- **Person-Location**: "Smith (Person)" "lives in", "founded", or "is associated with" "Smith (City)"
- **Event-Location**: "Conference (Event)" "takes place at" or "is hosted by" "Conference (Venue)"
- **Product-Company**: "Tesla (Product)" "is manufactured by" or "is developed by" "Tesla (Company)"
- **MANDATORY**: Always create at least one relationship for same-name entities
## DURATION AND TEMPORAL CONTEXT ENTITY USAGE
When Duration or TemporalContext entities are available in AVAILABLE ENTITIES:
- **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements
- **TemporalContext entities** (e.g., "since moving", "after breakup") should be used as "temporal_context" attributes
- **DO NOT** use Duration/TemporalContext entities as direct subjects or objects in relationships
- **DO USE** them to enrich relationship statements with temporal information
EXAMPLE: If AVAILABLE ENTITIES = ["Caroline", "friends", "4 years", "since moving"]:
"Caroline" "has known" "friends" [attributes: {"duration": "4 years", "temporal_context": "since moving"}]
"Caroline" "relates to" "4 years" (Duration as direct object)
"since moving" "describes" "friendship" (TemporalContext as direct subject)
## EXTRACTION PRINCIPLES
- Extract obvious structural relationships (not redundant noise)
- Prioritize simple over complex: "X is_in Y" > "X contextually_relates_to Y"
- Comprehensive coverage over selective "interesting" facts
- If pattern exists for one entity, check ALL entities for same pattern
- Skip only exact duplicates, not similar relationship types
## TEMPORAL INFORMATION HANDLING
- Capture temporal information in statement attributes (not as separate entities)
- **event_date**: When fact/event actually occurred (resolve using REFERENCE_TIME)
- **temporal_context**: Temporal descriptions ("last week", "recently")
EXAMPLES:
- "Max married Tina on January 14" {"event_date": "January 14", "temporal_context": "specific date"}
- "went camping last week" {"event_date": "[ISO date ~7 days before REFERENCE_TIME]", "temporal_context": "last week"}
- "going to Paris next month" {"event_date": "[ISO date ~1 month after REFERENCE_TIME]", "temporal_context": "next month"}
Format your response as a JSON array with the following structure:
<output>
[
{
"source": "[Subject Entity Name - MUST be from AVAILABLE ENTITIES]",
"predicate": "[Relationship Type]",
"target": "[Object Entity Name - MUST be from AVAILABLE ENTITIES]",
"fact": "[Natural language representation of the fact]",
"attributes": {
"event_date": "ISO date when the fact/event actually occurred (if applicable)",
"duration": "duration information from Duration entities (e.g., '4 years', '2 months')",
"context": "contextual information from TemporalContext entities (e.g., 'since moving', 'after breakup')"
}
}
]
</output>
IMPORTANT RULES:
- **ENTITIES**: ONLY use entities from AVAILABLE ENTITIES as source and target
- **NO INVENTION**: NEVER create statements where source or target is not in AVAILABLE ENTITIES
- **NO SELF-LOOPS**: NEVER create statements where the source and target are the same entity
- **SAME-NAME PRIORITY**: When entities share names but have different types, CREATE explicit relationship statements between them
- **NEW ONLY**: Do NOT create statements that duplicate relationships already present in previous episodes
- **TEMPORAL**: Instead of creating self-loops for temporal information, add timespan attributes to relevant statements
- **FILTER FIRST**: If you cannot express a NEW fact using only available entities, omit it entirely
- **OUTPUT FORMAT**: Always wrap output in tags <output> </output>
## QUALITY EXAMPLES
**INPUT**: "The sunset was beautiful. I felt peaceful watching it."
**GOOD OUTPUT** (Rich relationships):
"Author" "observed" "sunset"
"Author" "experienced" "peaceful feeling"
"Beautiful sunset" "caused" "Author" "to feel peaceful"
"Author" "found" "sunset" "beautiful"
**POOR OUTPUT** (Isolated facts):
"Sunset" "was" "beautiful"
"Feeling" "was" "peaceful"
**INPUT**: "I forgot my credit card at the store and had to go back. I felt so frustrated!"
**GOOD OUTPUT** (Enhanced with emotions & causality):
"Author" "forgot" "credit card"
"Author" "left" "credit card" "at store"
"Forgotten credit card" "caused" "Author" "to feel" "frustrated"
"Forgotten credit card" "forced" "Author" "to return"
"Author" "experienced" "inconvenience"
"Mistake" "resulted in" "Author" "learning" "to be more careful"
**INPUT**: "The museum was incredible. It reminded me of my trip to Rome last year."
**GOOD OUTPUT** (Cross-event relationships):
"Author" "visited" "museum"
"Author" "found" "museum" "incredible"
"Museum experience" "reminded" "Author" "of Rome trip"
"Previous Rome trip" "shaped" "Author's" "museum appreciation"
"Author" "made" "cross-cultural connection"
**ENHANCED VERIFICATION CHECKLIST:**
Did I capture the actor's subjective experience and emotions?
Are there causal relationships showing what caused feelings/decisions?
Did I include how experiences influenced the actor's perspective?
Are there connections between different events or experiences?
Did I capture both immediate reactions AND longer-term impacts?
Are there temporal sequences, cross-references, or learning moments?
CORRECT TECHNICAL EXAMPLES:
"Person" "is" "Role" (categorical relationship)
"Caroline" "has known" "friends" [attributes: {"duration": "4 years", "context": "since moving"}]
INCORRECT TECHNICAL EXAMPLES:
"John" "attends" "Party" (if "Party" not in AVAILABLE ENTITIES)
"Marriage" "occurs on" "Marriage" (self-loops prohibited)`,
},
{
role: "user",
content: `
<EPISODE_CONTENT>
${context.episodeContent}
</EPISODE_CONTENT>
<PREVIOUS_EPISODES>
${JSON.stringify(context.previousEpisodes, null, 2)}
</PREVIOUS_EPISODES>
<AVAILABLE_ENTITIES>
<PRIMARY_ENTITIES>
${JSON.stringify(context.entities.primary, null, 2)}
</PRIMARY_ENTITIES>
<EXPANDED_ENTITIES>
${JSON.stringify(context.entities.expanded, null, 2)}
</EXPANDED_ENTITIES>
</AVAILABLE_ENTITIES>
`,
},
];
};
/**
* Analyze similar statements to determine duplications and contradictions
* This prompt helps the LLM evaluate semantically similar statements found through vector search

View File

@ -829,11 +829,11 @@ async function processBatch(
userId,
);
// Call LLM for space assignments
// Space assignment is LOW complexity (rule-based classification with confidence scores)
let responseText = "";
await makeModelCall(false, prompt, (text: string) => {
responseText = text;
});
}, undefined, 'low');
// Response text is now set by the callback

View File

@ -265,10 +265,11 @@ async function extractExplicitPatterns(
const prompt = createExplicitPatternPrompt(themes, summary, statements);
// Pattern extraction requires HIGH complexity (insight synthesis, pattern recognition)
let responseText = "";
await makeModelCall(false, prompt, (text: string) => {
responseText = text;
});
}, undefined, 'high');
const patterns = parseExplicitPatternResponse(responseText);
@ -290,10 +291,11 @@ async function extractImplicitPatterns(
const prompt = createImplicitPatternPrompt(statements);
// Implicit pattern discovery requires HIGH complexity (pattern recognition from statements)
let responseText = "";
await makeModelCall(false, prompt, (text: string) => {
responseText = text;
});
}, undefined, 'high');
const patterns = parseImplicitPatternResponse(responseText);

View File

@ -341,10 +341,11 @@ async function generateUnifiedSummary(
previousThemes,
);
// Space summary generation requires HIGH complexity (creative synthesis, narrative generation)
let responseText = "";
await makeModelCall(false, prompt, (text: string) => {
responseText = text;
});
}, undefined, 'high');
return parseSummaryResponse(responseText);
} catch (error) {

View File

@ -14,12 +14,14 @@
"trigger:deploy": "pnpm dlx trigger.dev@4.0.0-v4-beta.22 deploy"
},
"dependencies": {
"@ai-sdk/amazon-bedrock": "2.2.12",
"@ai-sdk/anthropic": "^1.2.12",
"@ai-sdk/google": "^1.2.22",
"@ai-sdk/openai": "^1.3.21",
"@aws-sdk/client-s3": "3.879.0",
"@aws-sdk/s3-request-presigner": "3.879.0",
"@anthropic-ai/sdk": "^0.60.0",
"@aws-sdk/client-s3": "3.879.0",
"@aws-sdk/credential-providers": "^3.894.0",
"@aws-sdk/s3-request-presigner": "3.879.0",
"@coji/remix-auth-google": "^4.2.0",
"@conform-to/react": "^0.6.1",
"@conform-to/zod": "^0.6.1",
@ -78,7 +80,7 @@
"@tiptap/starter-kit": "2.11.9",
"@trigger.dev/react-hooks": "4.0.0-v4-beta.22",
"@trigger.dev/sdk": "4.0.0-v4-beta.22",
"ai": "4.3.14",
"ai": "4.3.19",
"axios": "^1.10.0",
"bullmq": "^5.53.2",
"cheerio": "^1.1.2",
@ -120,6 +122,7 @@
"react": "^18.2.0",
"react-calendar-heatmap": "^1.10.0",
"react-dom": "^18.2.0",
"react-markdown": "10.1.0",
"react-resizable-panels": "^1.0.9",
"react-virtualized": "^9.22.6",
"remix-auth": "^4.2.0",
@ -127,7 +130,6 @@
"remix-themes": "^2.0.4",
"remix-typedjson": "0.3.1",
"remix-utils": "^7.7.0",
"react-markdown": "10.1.0",
"sigma": "^3.0.2",
"simple-oauth2": "^5.1.0",
"tailwind-merge": "^2.6.0",
@ -135,7 +137,7 @@
"tailwindcss-animate": "^1.0.7",
"tailwindcss-textshadow": "^2.1.3",
"tiny-invariant": "^1.3.1",
"zod": "3.23.8",
"zod": "3.25.76",
"zod-error": "1.5.0",
"zod-validation-error": "^1.5.0"
},

1441
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@ -79,6 +79,9 @@
"RESEND_API_KEY",
"FROM_EMAIL",
"REPLY_TO_EMAIL",
"EMAIL_TRANSPORT"
"EMAIL_TRANSPORT",
"AWS_REGION",
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY"
]
}