mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 16:58:28 +00:00
1373 lines
43 KiB
TypeScript
1373 lines
43 KiB
TypeScript
import { type CoreMessage } from "ai";
|
|
import {
|
|
type ExtractedTripleData,
|
|
type AddEpisodeParams,
|
|
type EntityNode,
|
|
type EpisodicNode,
|
|
type StatementNode,
|
|
type Triple,
|
|
EpisodeTypeEnum,
|
|
type EpisodeType,
|
|
} from "@core/types";
|
|
import { logger } from "./logger.service";
|
|
import { ClusteringService } from "./clustering.server";
|
|
import crypto from "crypto";
|
|
import {
|
|
dedupeNodes,
|
|
extractAttributes,
|
|
extractEntities,
|
|
} from "./prompts/nodes";
|
|
import {
|
|
extractStatements,
|
|
resolveStatementPrompt,
|
|
} from "./prompts/statements";
|
|
import {
|
|
getEpisodeStatements,
|
|
getRecentEpisodes,
|
|
searchEpisodesByEmbedding,
|
|
} from "./graphModels/episode";
|
|
import {
|
|
findExactPredicateMatches,
|
|
findSimilarEntities,
|
|
} from "./graphModels/entity";
|
|
import {
|
|
findContradictoryStatements,
|
|
findSimilarStatements,
|
|
findStatementsWithSameSubjectObject,
|
|
getTripleForStatement,
|
|
invalidateStatements,
|
|
saveTriple,
|
|
searchStatementsByEmbedding,
|
|
} from "./graphModels/statement";
|
|
import { getEmbedding, makeModelCall } from "~/lib/model.server";
|
|
import { runQuery } from "~/lib/neo4j.server";
|
|
import { Apps, getNodeTypesString } from "~/utils/presets/nodes";
|
|
import { normalizePrompt, normalizeDocumentPrompt } from "./prompts";
|
|
import { type PrismaClient } from "@prisma/client";
|
|
|
|
// Default number of previous episodes to retrieve for context
|
|
const DEFAULT_EPISODE_WINDOW = 5;
|
|
|
|
export class KnowledgeGraphService {
|
|
private clusteringService: ClusteringService;
|
|
|
|
constructor() {
|
|
this.clusteringService = new ClusteringService();
|
|
}
|
|
|
|
async getEmbedding(text: string) {
|
|
return getEmbedding(text);
|
|
}
|
|
|
|
/**
|
|
* Invalidate statements from a previous document version that are no longer supported
|
|
* by the new document content using semantic similarity analysis
|
|
*/
|
|
async invalidateStatementsFromPreviousDocumentVersion(params: {
|
|
previousDocumentUuid: string;
|
|
newDocumentContent: string;
|
|
userId: string;
|
|
invalidatedBy: string;
|
|
semanticSimilarityThreshold?: number;
|
|
}): Promise<{
|
|
invalidatedStatements: string[];
|
|
preservedStatements: string[];
|
|
totalStatementsAnalyzed: number;
|
|
}> {
|
|
const threshold = params.semanticSimilarityThreshold || 0.75; // Lower threshold for document-level analysis
|
|
const invalidatedStatements: string[] = [];
|
|
const preservedStatements: string[] = [];
|
|
|
|
// Step 1: Get all statements from the previous document version
|
|
const previousStatements = await this.getStatementsFromDocument(
|
|
params.previousDocumentUuid,
|
|
params.userId,
|
|
);
|
|
|
|
if (previousStatements.length === 0) {
|
|
return {
|
|
invalidatedStatements: [],
|
|
preservedStatements: [],
|
|
totalStatementsAnalyzed: 0,
|
|
};
|
|
}
|
|
|
|
logger.log(
|
|
`Analyzing ${previousStatements.length} statements from previous document version`,
|
|
);
|
|
|
|
// Step 2: Generate embedding for new document content
|
|
const newDocumentEmbedding = await this.getEmbedding(
|
|
params.newDocumentContent,
|
|
);
|
|
|
|
// Step 3: For each statement, check if it's still semantically supported by new content
|
|
for (const statement of previousStatements) {
|
|
try {
|
|
// Generate embedding for the statement fact
|
|
const statementEmbedding = await this.getEmbedding(statement.fact);
|
|
|
|
// Calculate semantic similarity between statement and new document
|
|
const semanticSimilarity = this.calculateCosineSimilarity(
|
|
statementEmbedding,
|
|
newDocumentEmbedding,
|
|
);
|
|
|
|
if (semanticSimilarity < threshold) {
|
|
invalidatedStatements.push(statement.uuid);
|
|
logger.log(
|
|
`Invalidating statement: "${statement.fact}" (similarity: ${semanticSimilarity.toFixed(3)})`,
|
|
);
|
|
} else {
|
|
preservedStatements.push(statement.uuid);
|
|
logger.log(
|
|
`Preserving statement: "${statement.fact}" (similarity: ${semanticSimilarity.toFixed(3)})`,
|
|
);
|
|
}
|
|
} catch (error) {
|
|
logger.error(`Error analyzing statement ${statement.uuid}:`, { error });
|
|
// On error, be conservative and invalidate
|
|
invalidatedStatements.push(statement.uuid);
|
|
}
|
|
}
|
|
|
|
// Step 4: Bulk invalidate the selected statements
|
|
if (invalidatedStatements.length > 0) {
|
|
await invalidateStatements({
|
|
statementIds: invalidatedStatements,
|
|
invalidatedBy: params.invalidatedBy,
|
|
});
|
|
|
|
logger.log(`Document-level invalidation completed`, {
|
|
previousDocumentUuid: params.previousDocumentUuid,
|
|
totalAnalyzed: previousStatements.length,
|
|
invalidated: invalidatedStatements.length,
|
|
preserved: preservedStatements.length,
|
|
threshold,
|
|
});
|
|
}
|
|
|
|
return {
|
|
invalidatedStatements,
|
|
preservedStatements,
|
|
totalStatementsAnalyzed: previousStatements.length,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get all statements that were created from episodes linked to a specific document
|
|
*/
|
|
private async getStatementsFromDocument(
|
|
documentUuid: string,
|
|
userId: string,
|
|
): Promise<StatementNode[]> {
|
|
const query = `
|
|
MATCH (doc:Document {uuid: $documentUuid, userId: $userId})-[:CONTAINS_CHUNK]->(episode:Episode)
|
|
MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)
|
|
RETURN stmt
|
|
`;
|
|
|
|
const result = await runQuery(query, {
|
|
documentUuid,
|
|
userId,
|
|
});
|
|
|
|
return result.map((record) => {
|
|
const stmt = record.get("stmt").properties;
|
|
return {
|
|
uuid: stmt.uuid,
|
|
fact: stmt.fact,
|
|
factEmbedding: stmt.factEmbedding || [],
|
|
createdAt: new Date(stmt.createdAt),
|
|
validAt: new Date(stmt.validAt),
|
|
invalidAt: stmt.invalidAt ? new Date(stmt.invalidAt) : null,
|
|
attributes: stmt.attributesJson ? JSON.parse(stmt.attributesJson) : {},
|
|
userId: stmt.userId,
|
|
};
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Calculate cosine similarity between two embedding vectors
|
|
*/
|
|
private calculateCosineSimilarity(vecA: number[], vecB: number[]): number {
|
|
if (vecA.length !== vecB.length) {
|
|
throw new Error("Vector dimensions must match");
|
|
}
|
|
|
|
let dotProduct = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
|
|
for (let i = 0; i < vecA.length; i++) {
|
|
dotProduct += vecA[i] * vecB[i];
|
|
normA += vecA[i] * vecA[i];
|
|
normB += vecB[i] * vecB[i];
|
|
}
|
|
|
|
normA = Math.sqrt(normA);
|
|
normB = Math.sqrt(normB);
|
|
|
|
if (normA === 0 || normB === 0) {
|
|
return 0;
|
|
}
|
|
|
|
return dotProduct / (normA * normB);
|
|
}
|
|
|
|
/**
|
|
* Process an episode and update the knowledge graph.
|
|
*
|
|
* This method extracts information from the episode, creates nodes and statements,
|
|
* and updates the HelixDB database according to the reified + temporal approach.
|
|
*/
|
|
async addEpisode(
|
|
params: AddEpisodeParams,
|
|
prisma: PrismaClient,
|
|
): Promise<{
|
|
episodeUuid: string | null;
|
|
statementsCreated: number;
|
|
processingTimeMs: number;
|
|
}> {
|
|
const startTime = Date.now();
|
|
const now = new Date();
|
|
|
|
try {
|
|
// Step 1: Context Retrieval - Get previous episodes for context
|
|
const previousEpisodes = await getRecentEpisodes({
|
|
referenceTime: params.referenceTime,
|
|
limit: DEFAULT_EPISODE_WINDOW,
|
|
userId: params.userId,
|
|
source: params.source,
|
|
sessionId: params.sessionId,
|
|
});
|
|
|
|
// Format session context from previous episodes
|
|
const sessionContext =
|
|
params.sessionId && previousEpisodes.length > 0
|
|
? previousEpisodes
|
|
.map(
|
|
(ep, i) =>
|
|
`Episode ${i + 1} (${ep.createdAt.toISOString()}): ${ep.content}`,
|
|
)
|
|
.join("\n\n")
|
|
: undefined;
|
|
|
|
const normalizedEpisodeBody = await this.normalizeEpisodeBody(
|
|
params.episodeBody,
|
|
params.source,
|
|
params.userId,
|
|
prisma,
|
|
new Date(params.referenceTime),
|
|
sessionContext,
|
|
params.type,
|
|
);
|
|
|
|
const normalizedTime = Date.now();
|
|
logger.log(`Normalized episode body in ${normalizedTime - startTime} ms`);
|
|
|
|
if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") {
|
|
logger.log("Nothing to remember");
|
|
return {
|
|
episodeUuid: null,
|
|
statementsCreated: 0,
|
|
processingTimeMs: 0,
|
|
};
|
|
}
|
|
|
|
// Step 2: Episode Creation - Create or retrieve the episode
|
|
const episode: EpisodicNode = {
|
|
uuid: crypto.randomUUID(),
|
|
content: normalizedEpisodeBody,
|
|
originalContent: params.episodeBody,
|
|
contentEmbedding: await this.getEmbedding(normalizedEpisodeBody),
|
|
source: params.source,
|
|
metadata: params.metadata || {},
|
|
createdAt: now,
|
|
validAt: new Date(params.referenceTime),
|
|
labels: [],
|
|
userId: params.userId,
|
|
space: params.spaceId,
|
|
sessionId: params.sessionId,
|
|
};
|
|
|
|
// Step 3: Entity Extraction - Extract entities from the episode content
|
|
const extractedNodes = await this.extractEntities(
|
|
episode,
|
|
previousEpisodes,
|
|
);
|
|
|
|
console.log(extractedNodes.map((node) => node.name));
|
|
|
|
const extractedTime = Date.now();
|
|
logger.log(`Extracted entities in ${extractedTime - normalizedTime} ms`);
|
|
|
|
// Step 3.1: Simple entity categorization (no type-based expansion needed)
|
|
const categorizedEntities = {
|
|
primary: extractedNodes,
|
|
expanded: [], // No expansion needed with type-free approach
|
|
};
|
|
|
|
const expandedTime = Date.now();
|
|
logger.log(`Processed entities in ${expandedTime - extractedTime} ms`);
|
|
|
|
// Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges
|
|
const extractedStatements = await this.extractStatements(
|
|
episode,
|
|
categorizedEntities,
|
|
previousEpisodes,
|
|
);
|
|
|
|
const extractedStatementsTime = Date.now();
|
|
logger.log(
|
|
`Extracted statements in ${extractedStatementsTime - expandedTime} ms`,
|
|
);
|
|
|
|
// Step 5: Entity Resolution - Resolve extracted nodes to existing nodes or create new ones
|
|
const resolvedTriples = await this.resolveExtractedNodes(
|
|
extractedStatements,
|
|
episode,
|
|
previousEpisodes,
|
|
);
|
|
|
|
const resolvedTriplesTime = Date.now();
|
|
logger.log(
|
|
`Resolved Entities in ${resolvedTriplesTime - extractedStatementsTime} ms`,
|
|
);
|
|
|
|
// Step 6: Statement Resolution - Resolve statements and detect contradictions
|
|
const { resolvedStatements, invalidatedStatements } =
|
|
await this.resolveStatements(
|
|
resolvedTriples,
|
|
episode,
|
|
previousEpisodes,
|
|
);
|
|
|
|
const resolvedStatementsTime = Date.now();
|
|
logger.log(
|
|
`Resolved statements in ${resolvedStatementsTime - resolvedTriplesTime} ms`,
|
|
);
|
|
|
|
// Step 7: ADd attributes to entity nodes
|
|
// const updatedTriples = await this.addAttributesToEntities(
|
|
// resolvedStatements,
|
|
// episode,
|
|
// );
|
|
|
|
const updatedTriples = resolvedStatements;
|
|
|
|
const updatedTriplesTime = Date.now();
|
|
logger.log(
|
|
`Updated triples in ${updatedTriplesTime - resolvedStatementsTime} ms`,
|
|
);
|
|
|
|
for (const triple of updatedTriples) {
|
|
const { subject, predicate, object, statement, provenance } = triple;
|
|
const safeTriple = {
|
|
subject: {
|
|
...subject,
|
|
nameEmbedding: undefined,
|
|
typeEmbedding: undefined,
|
|
},
|
|
predicate: {
|
|
...predicate,
|
|
nameEmbedding: undefined,
|
|
typeEmbedding: undefined,
|
|
},
|
|
object: {
|
|
...object,
|
|
nameEmbedding: undefined,
|
|
typeEmbedding: undefined,
|
|
},
|
|
statement: { ...statement, factEmbedding: undefined },
|
|
provenance: { ...provenance, contentEmbedding: undefined },
|
|
};
|
|
}
|
|
|
|
// Save triples in parallel for better performance
|
|
await Promise.all(updatedTriples.map((triple) => saveTriple(triple)));
|
|
|
|
const saveTriplesTime = Date.now();
|
|
logger.log(`Saved triples in ${saveTriplesTime - updatedTriplesTime} ms`);
|
|
|
|
// Invalidate invalidated statements
|
|
await invalidateStatements({
|
|
statementIds: invalidatedStatements,
|
|
invalidatedBy: episode.uuid,
|
|
});
|
|
|
|
const endTime = Date.now();
|
|
const processingTimeMs = endTime - startTime;
|
|
logger.log(`Processing time: ${processingTimeMs} ms`);
|
|
|
|
return {
|
|
episodeUuid: episode.uuid,
|
|
// nodesCreated: hydratedNodes.length,
|
|
statementsCreated: resolvedStatements.length,
|
|
processingTimeMs,
|
|
};
|
|
} catch (error) {
|
|
console.error("Error in addEpisode:", error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract entities from an episode using LLM
|
|
*/
|
|
private async extractEntities(
|
|
episode: EpisodicNode,
|
|
previousEpisodes: EpisodicNode[],
|
|
): Promise<EntityNode[]> {
|
|
// Use the prompt library to get the appropriate prompts
|
|
const context = {
|
|
episodeContent: episode.content,
|
|
previousEpisodes: previousEpisodes.map((ep) => ({
|
|
content: ep.content,
|
|
createdAt: ep.createdAt.toISOString(),
|
|
})),
|
|
};
|
|
|
|
// Get the unified entity extraction prompt
|
|
const extractionMode = episode.sessionId ? "conversation" : "document";
|
|
const messages = extractEntities(context, extractionMode);
|
|
|
|
let responseText = "";
|
|
|
|
await makeModelCall(false, messages as CoreMessage[], (text) => {
|
|
responseText = text;
|
|
});
|
|
|
|
// Convert to EntityNode objects
|
|
let entities: EntityNode[] = [];
|
|
|
|
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
|
|
|
if (outputMatch && outputMatch[1]) {
|
|
responseText = outputMatch[1].trim();
|
|
const extractedEntities = JSON.parse(responseText || "{}").entities || [];
|
|
|
|
// Batch generate embeddings for entity names
|
|
const entityNames = extractedEntities.map((entity: any) => entity.name);
|
|
const nameEmbeddings = await Promise.all(
|
|
entityNames.map((name: string) => this.getEmbedding(name)),
|
|
);
|
|
|
|
entities = extractedEntities.map((entity: any, index: number) => ({
|
|
uuid: crypto.randomUUID(),
|
|
name: entity.name,
|
|
type: undefined, // Type will be inferred from statements
|
|
attributes: entity.attributes || {},
|
|
nameEmbedding: nameEmbeddings[index],
|
|
typeEmbedding: undefined, // No type embedding needed
|
|
createdAt: new Date(),
|
|
userId: episode.userId,
|
|
}));
|
|
}
|
|
|
|
return entities;
|
|
}
|
|
|
|
/**
|
|
* Extract statements as first-class objects from an episode using LLM
|
|
* This replaces the previous extractEdges method with a reified approach
|
|
*/
|
|
private async extractStatements(
|
|
episode: EpisodicNode,
|
|
categorizedEntities: {
|
|
primary: EntityNode[];
|
|
expanded: EntityNode[];
|
|
},
|
|
previousEpisodes: EpisodicNode[],
|
|
): Promise<Triple[]> {
|
|
// Use the prompt library to get the appropriate prompts
|
|
const context = {
|
|
episodeContent: episode.content,
|
|
previousEpisodes: previousEpisodes.map((ep) => ({
|
|
content: ep.content,
|
|
createdAt: ep.createdAt.toISOString(),
|
|
})),
|
|
entities: {
|
|
primary: categorizedEntities.primary.map((node) => ({
|
|
name: node.name,
|
|
type: node.type,
|
|
})),
|
|
expanded: categorizedEntities.expanded.map((node) => ({
|
|
name: node.name,
|
|
type: node.type,
|
|
})),
|
|
},
|
|
referenceTime: episode.validAt.toISOString(),
|
|
};
|
|
|
|
// Get the statement extraction prompt from the prompt library
|
|
const messages = extractStatements(context);
|
|
|
|
let responseText = "";
|
|
await makeModelCall(false, messages as CoreMessage[], (text) => {
|
|
responseText = text;
|
|
});
|
|
|
|
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
|
if (outputMatch && outputMatch[1]) {
|
|
responseText = outputMatch[1].trim();
|
|
} else {
|
|
responseText = "{}";
|
|
}
|
|
|
|
// Parse the statements from the LLM response
|
|
const extractedTriples: ExtractedTripleData[] =
|
|
JSON.parse(responseText || "{}").edges || [];
|
|
|
|
console.log(`extracted triples length: ${extractedTriples.length}`);
|
|
|
|
// Create maps to deduplicate entities by name within this extraction
|
|
const predicateMap = new Map<string, EntityNode>();
|
|
|
|
// First pass: collect all unique predicates from the current extraction
|
|
for (const triple of extractedTriples) {
|
|
const predicateName = triple.predicate.toLowerCase();
|
|
if (!predicateMap.has(predicateName)) {
|
|
// Create new predicate (embedding will be generated later in batch)
|
|
const newPredicate = {
|
|
uuid: crypto.randomUUID(),
|
|
name: triple.predicate,
|
|
type: "Predicate",
|
|
attributes: {},
|
|
nameEmbedding: null as any, // Will be filled later
|
|
typeEmbedding: null as any, // Will be filled later
|
|
createdAt: new Date(),
|
|
userId: episode.userId,
|
|
};
|
|
predicateMap.set(predicateName, newPredicate);
|
|
}
|
|
}
|
|
|
|
// Combine primary and expanded entities for entity matching
|
|
const allEntities = [
|
|
...categorizedEntities.primary,
|
|
...categorizedEntities.expanded,
|
|
];
|
|
|
|
// Batch generate embeddings for predicates and facts
|
|
const uniquePredicates = Array.from(predicateMap.values());
|
|
const factTexts = extractedTriples.map((t) => t.fact);
|
|
const predicateNames = uniquePredicates.map((p) => p.name);
|
|
|
|
const [predicateNameEmbeddings, predicateTypeEmbeddings, factEmbeddings] =
|
|
await Promise.all([
|
|
Promise.all(predicateNames.map((name) => this.getEmbedding(name))),
|
|
Promise.all(predicateNames.map(() => this.getEmbedding("Predicate"))),
|
|
Promise.all(factTexts.map((fact) => this.getEmbedding(fact))),
|
|
]);
|
|
|
|
// Update predicate embeddings
|
|
uniquePredicates.forEach((predicate, index) => {
|
|
predicate.nameEmbedding = predicateNameEmbeddings[index];
|
|
predicate.typeEmbedding = predicateTypeEmbeddings[index];
|
|
});
|
|
|
|
// Convert extracted triples to Triple objects with Statement nodes
|
|
const triples = extractedTriples.map(
|
|
(triple: ExtractedTripleData, tripleIndex: number) => {
|
|
// Find the subject and object nodes by matching name (type-free approach)
|
|
const subjectNode = allEntities.find(
|
|
(node) => node.name.toLowerCase() === triple.source.toLowerCase(),
|
|
);
|
|
|
|
const objectNode = allEntities.find(
|
|
(node) => node.name.toLowerCase() === triple.target.toLowerCase(),
|
|
);
|
|
|
|
// Get the deduplicated predicate node
|
|
const predicateNode = predicateMap.get(triple.predicate.toLowerCase());
|
|
|
|
if (subjectNode && objectNode && predicateNode) {
|
|
// Determine the correct validAt date (when the fact actually occurred/occurs)
|
|
let validAtDate = episode.validAt; // Default fallback to episode date
|
|
|
|
// Check if statement has event_date indicating when the fact actually happened/happens
|
|
if (triple.attributes?.event_date) {
|
|
try {
|
|
const eventDate = new Date(triple.attributes.event_date);
|
|
// Use the event date as validAt (when the fact is actually true)
|
|
if (!isNaN(eventDate.getTime())) {
|
|
validAtDate = eventDate;
|
|
}
|
|
} catch (error) {
|
|
// If parsing fails, use episode validAt as fallback
|
|
logger.log(
|
|
`Failed to parse event_date: ${triple.attributes.event_date}, using episode validAt`,
|
|
);
|
|
}
|
|
}
|
|
|
|
// Create a statement node
|
|
const statement: StatementNode = {
|
|
uuid: crypto.randomUUID(),
|
|
fact: triple.fact,
|
|
factEmbedding: factEmbeddings[tripleIndex],
|
|
createdAt: new Date(),
|
|
validAt: validAtDate,
|
|
invalidAt: null,
|
|
attributes: triple.attributes || {},
|
|
userId: episode.userId,
|
|
};
|
|
|
|
return {
|
|
statement,
|
|
subject: subjectNode,
|
|
predicate: predicateNode,
|
|
object: objectNode,
|
|
provenance: episode,
|
|
};
|
|
}
|
|
return null;
|
|
},
|
|
);
|
|
|
|
// Filter out null values (where subject or object wasn't found)
|
|
return triples.filter(Boolean) as Triple[];
|
|
}
|
|
|
|
/**
|
|
* Resolve extracted nodes to existing nodes or create new ones
|
|
*/
|
|
private async resolveExtractedNodes(
|
|
triples: Triple[],
|
|
episode: EpisodicNode,
|
|
previousEpisodes: EpisodicNode[],
|
|
): Promise<Triple[]> {
|
|
// Step 1: Extract unique entities from triples
|
|
const uniqueEntitiesMap = new Map<string, EntityNode>();
|
|
const entityIdToPositions = new Map<
|
|
string,
|
|
Array<{
|
|
tripleIndex: number;
|
|
position: "subject" | "predicate" | "object";
|
|
}>
|
|
>();
|
|
|
|
// First pass: collect all unique entities and their positions in triples
|
|
triples.forEach((triple, tripleIndex) => {
|
|
// Process subject
|
|
if (!uniqueEntitiesMap.has(triple.subject.uuid)) {
|
|
uniqueEntitiesMap.set(triple.subject.uuid, triple.subject);
|
|
}
|
|
if (!entityIdToPositions.has(triple.subject.uuid)) {
|
|
entityIdToPositions.set(triple.subject.uuid, []);
|
|
}
|
|
entityIdToPositions.get(triple.subject.uuid)!.push({
|
|
tripleIndex,
|
|
position: "subject",
|
|
});
|
|
|
|
// Process predicate
|
|
if (!uniqueEntitiesMap.has(triple.predicate.uuid)) {
|
|
uniqueEntitiesMap.set(triple.predicate.uuid, triple.predicate);
|
|
}
|
|
if (!entityIdToPositions.has(triple.predicate.uuid)) {
|
|
entityIdToPositions.set(triple.predicate.uuid, []);
|
|
}
|
|
entityIdToPositions.get(triple.predicate.uuid)!.push({
|
|
tripleIndex,
|
|
position: "predicate",
|
|
});
|
|
|
|
// Process object
|
|
if (!uniqueEntitiesMap.has(triple.object.uuid)) {
|
|
uniqueEntitiesMap.set(triple.object.uuid, triple.object);
|
|
}
|
|
if (!entityIdToPositions.has(triple.object.uuid)) {
|
|
entityIdToPositions.set(triple.object.uuid, []);
|
|
}
|
|
entityIdToPositions.get(triple.object.uuid)!.push({
|
|
tripleIndex,
|
|
position: "object",
|
|
});
|
|
});
|
|
|
|
// Convert to arrays for processing
|
|
const uniqueEntities = Array.from(uniqueEntitiesMap.values());
|
|
|
|
// Separate predicates from other entities
|
|
const predicates = uniqueEntities.filter(
|
|
(entity) => entity.type === "Predicate",
|
|
);
|
|
const nonPredicates = uniqueEntities.filter(
|
|
(entity) => entity.type !== "Predicate",
|
|
);
|
|
|
|
// Step 2a: Find similar entities for non-predicate entities
|
|
const similarEntitiesResults = await Promise.all(
|
|
nonPredicates.map(async (entity) => {
|
|
const similarEntities = await findSimilarEntities({
|
|
queryEmbedding: entity.nameEmbedding,
|
|
limit: 5,
|
|
threshold: 0.7,
|
|
userId: episode.userId,
|
|
});
|
|
return {
|
|
entity,
|
|
similarEntities,
|
|
};
|
|
}),
|
|
);
|
|
|
|
// Step 2b: Find exact matches for predicates
|
|
const exactPredicateResults = await Promise.all(
|
|
predicates.map(async (predicate) => {
|
|
const exactMatches = await findExactPredicateMatches({
|
|
predicateName: predicate.name,
|
|
userId: episode.userId,
|
|
});
|
|
|
|
// Filter out the current predicate from matches
|
|
const filteredMatches = exactMatches.filter(
|
|
(match) => match.uuid !== predicate.uuid,
|
|
);
|
|
|
|
return {
|
|
entity: predicate,
|
|
similarEntities: filteredMatches, // Use the same structure as similarEntitiesResults
|
|
};
|
|
}),
|
|
);
|
|
|
|
// Combine the results
|
|
const allEntityResults = [
|
|
...similarEntitiesResults,
|
|
...exactPredicateResults,
|
|
];
|
|
|
|
// Step 3: Prepare context for LLM deduplication
|
|
const dedupeContext = {
|
|
extracted_nodes: allEntityResults.map((result, index) => ({
|
|
id: index,
|
|
name: result.entity.name,
|
|
entity_type: result.entity.type,
|
|
duplication_candidates: result.similarEntities.map((candidate, j) => ({
|
|
idx: j,
|
|
name: candidate.name,
|
|
entity_type: candidate.type,
|
|
})),
|
|
})),
|
|
episode_content: episode ? episode.content : "",
|
|
previous_episodes: previousEpisodes
|
|
? previousEpisodes.map((ep) => ep.content)
|
|
: [],
|
|
};
|
|
|
|
// Step 4: Call LLM to resolve duplicates
|
|
const messages = dedupeNodes(dedupeContext);
|
|
let responseText = "";
|
|
|
|
await makeModelCall(false, messages as CoreMessage[], (text) => {
|
|
responseText = text;
|
|
});
|
|
|
|
// Step 5: Process LLM response
|
|
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
|
if (!outputMatch || !outputMatch[1]) {
|
|
return triples; // Return original if parsing fails
|
|
}
|
|
|
|
try {
|
|
responseText = outputMatch[1].trim();
|
|
const parsedResponse = JSON.parse(responseText);
|
|
const nodeResolutions = parsedResponse.entity_resolutions || [];
|
|
|
|
// Step 6: Create mapping from original entity UUID to resolved entity
|
|
const entityResolutionMap = new Map<string, EntityNode>();
|
|
|
|
nodeResolutions.forEach((resolution: any, index: number) => {
|
|
const originalEntity = allEntityResults[resolution.id ?? index];
|
|
if (!originalEntity) return;
|
|
|
|
const duplicateIdx = resolution.duplicate_idx ?? -1;
|
|
|
|
// Get the corresponding result from allEntityResults
|
|
const resultEntry = allEntityResults.find(
|
|
(result) => result.entity.uuid === originalEntity.entity.uuid,
|
|
);
|
|
|
|
if (!resultEntry) return;
|
|
|
|
// If a duplicate was found, use that entity, otherwise keep original
|
|
const resolvedEntity =
|
|
duplicateIdx >= 0 && duplicateIdx < resultEntry.similarEntities.length
|
|
? resultEntry.similarEntities[duplicateIdx]
|
|
: originalEntity.entity;
|
|
|
|
// Update name if provided
|
|
if (resolution.name) {
|
|
resolvedEntity.name = resolution.name;
|
|
}
|
|
|
|
// Map original UUID to resolved entity
|
|
entityResolutionMap.set(originalEntity.entity.uuid, resolvedEntity);
|
|
});
|
|
|
|
// Step 7: Reconstruct triples with resolved entities
|
|
const resolvedTriples = triples.map((triple) => {
|
|
const newTriple = { ...triple };
|
|
|
|
// Replace subject if resolved
|
|
if (entityResolutionMap.has(triple.subject.uuid)) {
|
|
newTriple.subject = entityResolutionMap.get(triple.subject.uuid)!;
|
|
}
|
|
|
|
// Replace predicate if resolved
|
|
if (entityResolutionMap.has(triple.predicate.uuid)) {
|
|
newTriple.predicate = entityResolutionMap.get(triple.predicate.uuid)!;
|
|
}
|
|
|
|
// Replace object if resolved
|
|
if (entityResolutionMap.has(triple.object.uuid)) {
|
|
newTriple.object = entityResolutionMap.get(triple.object.uuid)!;
|
|
}
|
|
|
|
return newTriple;
|
|
});
|
|
|
|
return resolvedTriples;
|
|
} catch (error) {
|
|
console.error("Error processing entity resolutions:", error);
|
|
return triples; // Return original triples on error
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Resolve statements by checking for existing statements and handling contradictions
|
|
* This replaces the previous resolveExtractedEdges method with a reified approach
|
|
*/
|
|
private async resolveStatements(
|
|
triples: Triple[],
|
|
episode: EpisodicNode,
|
|
previousEpisodes: EpisodicNode[],
|
|
): Promise<{
|
|
resolvedStatements: Triple[];
|
|
invalidatedStatements: string[];
|
|
}> {
|
|
const resolvedStatements: Triple[] = [];
|
|
const invalidatedStatements: string[] = [];
|
|
|
|
if (triples.length === 0) {
|
|
return { resolvedStatements, invalidatedStatements };
|
|
}
|
|
|
|
// Step 1: Collect all potential matches for all triples at once
|
|
const allPotentialMatches: Map<string, StatementNode[]> = new Map();
|
|
const allExistingTripleData: Map<string, Triple> = new Map();
|
|
|
|
// For preparing the LLM context
|
|
const newStatements: any[] = [];
|
|
const similarStatements: any[] = [];
|
|
|
|
for (const triple of triples) {
|
|
// Track IDs of statements we've already checked to avoid duplicates
|
|
const checkedStatementIds: string[] = [];
|
|
let potentialMatches: StatementNode[] = [];
|
|
|
|
// Phase 1a: Find statements with exact subject-predicate match
|
|
// Example: "John lives_in New York" vs "John lives_in San Francisco"
|
|
const exactMatches = await findContradictoryStatements({
|
|
subjectId: triple.subject.uuid,
|
|
predicateId: triple.predicate.uuid,
|
|
userId: triple.provenance.userId,
|
|
});
|
|
|
|
if (exactMatches && exactMatches.length > 0) {
|
|
potentialMatches.push(...exactMatches);
|
|
checkedStatementIds.push(...exactMatches.map((s) => s.uuid));
|
|
}
|
|
|
|
// Phase 1b: Find statements with same subject-object but different predicates
|
|
// Example: "John is_married_to Sarah" vs "John is_divorced_from Sarah"
|
|
const subjectObjectMatches = await findStatementsWithSameSubjectObject({
|
|
subjectId: triple.subject.uuid,
|
|
objectId: triple.object.uuid,
|
|
excludePredicateId: triple.predicate.uuid,
|
|
userId: triple.provenance.userId,
|
|
});
|
|
|
|
if (subjectObjectMatches && subjectObjectMatches.length > 0) {
|
|
// Filter out statements we've already checked
|
|
const newSubjectObjectMatches = subjectObjectMatches.filter(
|
|
(match) => !checkedStatementIds.includes(match.uuid),
|
|
);
|
|
if (newSubjectObjectMatches.length > 0) {
|
|
potentialMatches.push(...newSubjectObjectMatches);
|
|
checkedStatementIds.push(
|
|
...newSubjectObjectMatches.map((s) => s.uuid),
|
|
);
|
|
}
|
|
}
|
|
|
|
// Phase 2: Find semantically similar statements
|
|
const semanticMatches = await findSimilarStatements({
|
|
factEmbedding: triple.statement.factEmbedding,
|
|
threshold: 0.85,
|
|
excludeIds: checkedStatementIds,
|
|
userId: triple.provenance.userId,
|
|
});
|
|
|
|
if (semanticMatches && semanticMatches.length > 0) {
|
|
potentialMatches.push(...semanticMatches);
|
|
}
|
|
|
|
// Phase 3: Check related memories for contradictory statements
|
|
const previousEpisodesStatements: StatementNode[] = [];
|
|
|
|
await Promise.all(
|
|
previousEpisodes.map(async (episode) => {
|
|
const statements = await getEpisodeStatements({
|
|
episodeUuid: episode.uuid,
|
|
userId: episode.userId,
|
|
});
|
|
previousEpisodesStatements.push(...statements);
|
|
}),
|
|
);
|
|
|
|
if (previousEpisodesStatements && previousEpisodesStatements.length > 0) {
|
|
// Filter out facts we've already checked
|
|
const newRelatedFacts = previousEpisodesStatements
|
|
.flat()
|
|
.filter((fact) => !checkedStatementIds.includes(fact.uuid));
|
|
|
|
if (newRelatedFacts.length > 0) {
|
|
potentialMatches.push(...newRelatedFacts);
|
|
}
|
|
}
|
|
|
|
if (potentialMatches.length > 0) {
|
|
logger.info(
|
|
`Found ${potentialMatches.length} potential matches for: ${triple.statement.fact}`,
|
|
);
|
|
|
|
allPotentialMatches.set(triple.statement.uuid, potentialMatches);
|
|
|
|
// Get full triple information for each potential match
|
|
for (const match of potentialMatches) {
|
|
if (!allExistingTripleData.has(match.uuid)) {
|
|
const existingTripleData = await getTripleForStatement({
|
|
statementId: match.uuid,
|
|
});
|
|
|
|
if (existingTripleData) {
|
|
allExistingTripleData.set(match.uuid, existingTripleData);
|
|
|
|
// Add to similarStatements for LLM context
|
|
similarStatements.push({
|
|
statementId: match.uuid,
|
|
fact: existingTripleData.statement.fact,
|
|
subject: existingTripleData.subject.name,
|
|
predicate: existingTripleData.predicate.name,
|
|
object: existingTripleData.object.name,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add to newStatements for LLM context
|
|
newStatements.push({
|
|
statement: {
|
|
uuid: triple.statement.uuid,
|
|
fact: triple.statement.fact,
|
|
},
|
|
subject: triple.subject.name,
|
|
predicate: triple.predicate.name,
|
|
object: triple.object.name,
|
|
});
|
|
}
|
|
|
|
// Step 2: If we have potential matches, use the LLM to analyze them in batch
|
|
if (similarStatements.length > 0) {
|
|
// Prepare context for the LLM
|
|
const promptContext = {
|
|
newStatements,
|
|
similarStatements,
|
|
episodeContent: episode.content,
|
|
referenceTime: episode.validAt.toISOString(),
|
|
};
|
|
|
|
// Get the statement resolution prompt
|
|
const messages = resolveStatementPrompt(promptContext);
|
|
|
|
let responseText = "";
|
|
|
|
// Call the LLM to analyze all statements at once
|
|
await makeModelCall(false, messages, (text) => {
|
|
responseText = text;
|
|
});
|
|
|
|
try {
|
|
// Extract the JSON response from the output tags
|
|
const jsonMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
|
const analysisResult = jsonMatch ? JSON.parse(jsonMatch[1]) : [];
|
|
|
|
// Process the analysis results
|
|
for (const result of analysisResult) {
|
|
const tripleIndex = triples.findIndex(
|
|
(t) => t.statement.uuid === result.statementId,
|
|
);
|
|
if (tripleIndex === -1) continue;
|
|
|
|
const triple = triples[tripleIndex];
|
|
|
|
// Handle duplicates
|
|
if (result.isDuplicate && result.duplicateId) {
|
|
const duplicateTriple = allExistingTripleData.get(
|
|
result.duplicateId,
|
|
);
|
|
if (duplicateTriple) {
|
|
logger.info(`Statement is a duplicate: ${triple.statement.fact}`);
|
|
resolvedStatements.push(duplicateTriple);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Handle contradictions
|
|
if (result.contradictions && result.contradictions.length > 0) {
|
|
for (const contradictionId of result.contradictions) {
|
|
const contradictedTriple =
|
|
allExistingTripleData.get(contradictionId);
|
|
if (contradictedTriple) {
|
|
invalidatedStatements.push(contradictedTriple.statement.uuid);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Add the new statement if it's not a duplicate
|
|
if (!result.isDuplicate) {
|
|
logger.info(`Adding new statement: ${triple.statement.fact}`);
|
|
resolvedStatements.push(triple);
|
|
}
|
|
}
|
|
} catch (e) {
|
|
logger.error("Error processing batch analysis:", { error: e });
|
|
|
|
// Fallback: add all statements as new if we couldn't process the analysis
|
|
for (const triple of triples) {
|
|
if (
|
|
!resolvedStatements.some(
|
|
(s) => s.statement.uuid === triple.statement.uuid,
|
|
)
|
|
) {
|
|
logger.info(
|
|
`Fallback: Adding statement as new: ${triple.statement.fact}`,
|
|
);
|
|
resolvedStatements.push(triple);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// No potential matches found for any statements, add them all as new
|
|
for (const triple of triples) {
|
|
logger.info(
|
|
`No matches found, adding as new: ${triple.statement.fact}`,
|
|
);
|
|
resolvedStatements.push(triple);
|
|
}
|
|
}
|
|
|
|
return { resolvedStatements, invalidatedStatements };
|
|
}
|
|
|
|
/**
|
|
* Add attributes to entity nodes based on the resolved statements
|
|
*/
|
|
private async addAttributesToEntities(
|
|
triples: Triple[],
|
|
episode: EpisodicNode,
|
|
): Promise<Triple[]> {
|
|
// Collect all unique entities from the triples
|
|
const entityMap = new Map<string, EntityNode>();
|
|
|
|
// Add all subjects, predicates, and objects to the map
|
|
triples.forEach((triple) => {
|
|
if (triple.subject) {
|
|
entityMap.set(triple.subject.uuid, triple.subject);
|
|
}
|
|
if (triple.predicate) {
|
|
entityMap.set(triple.predicate.uuid, triple.predicate);
|
|
}
|
|
if (triple.object) {
|
|
entityMap.set(triple.object.uuid, triple.object);
|
|
}
|
|
});
|
|
|
|
// Convert the map to an array of entities
|
|
const entities = Array.from(entityMap.values());
|
|
|
|
if (entities.length === 0) {
|
|
return triples; // No entities to process
|
|
}
|
|
|
|
// Prepare simplified context for the LLM
|
|
const context = {
|
|
episodeContent: episode.content,
|
|
entities: entities.map((entity) => ({
|
|
uuid: entity.uuid,
|
|
name: entity.name,
|
|
currentAttributes: entity.attributes || {},
|
|
})),
|
|
};
|
|
|
|
// Create a prompt for the LLM to extract attributes
|
|
const messages = extractAttributes(context);
|
|
|
|
let responseText = "";
|
|
|
|
// Call the LLM to extract attributes
|
|
await makeModelCall(false, messages as CoreMessage[], (text) => {
|
|
responseText = text;
|
|
});
|
|
|
|
try {
|
|
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
|
if (outputMatch && outputMatch[1]) {
|
|
responseText = outputMatch[1].trim();
|
|
}
|
|
// Parse the LLM response
|
|
const responseData = JSON.parse(responseText);
|
|
const updatedEntities = responseData.entities || [];
|
|
|
|
// Update entity attributes and save them
|
|
for (const updatedEntity of updatedEntities) {
|
|
const entity = entityMap.get(updatedEntity.uuid);
|
|
if (entity) {
|
|
// Merge the existing attributes with the new ones
|
|
entity.attributes = {
|
|
...updatedEntity.attributes,
|
|
};
|
|
}
|
|
}
|
|
|
|
logger.info(`Updated attributes for ${updatedEntities.length} entities`);
|
|
} catch (error) {
|
|
logger.error("Error processing entity attributes", { error });
|
|
}
|
|
|
|
return triples;
|
|
}
|
|
|
|
/**
|
|
* Normalize an episode by extracting entities and creating nodes and statements
|
|
*/
|
|
private async normalizeEpisodeBody(
|
|
episodeBody: string,
|
|
source: string,
|
|
userId: string,
|
|
prisma: PrismaClient,
|
|
episodeTimestamp?: Date,
|
|
sessionContext?: string,
|
|
contentType?: EpisodeType,
|
|
) {
|
|
let appEnumValues: Apps[] = [];
|
|
if (Apps[source.toUpperCase() as keyof typeof Apps]) {
|
|
appEnumValues = [Apps[source.toUpperCase() as keyof typeof Apps]];
|
|
}
|
|
const entityTypes = getNodeTypesString(appEnumValues);
|
|
const relatedMemories = await this.getRelatedMemories(episodeBody, userId);
|
|
|
|
// Fetch ingestion rules for this source
|
|
const ingestionRules = await this.getIngestionRulesForSource(
|
|
source,
|
|
userId,
|
|
prisma,
|
|
);
|
|
|
|
const context = {
|
|
episodeContent: episodeBody,
|
|
entityTypes: entityTypes,
|
|
source,
|
|
relatedMemories,
|
|
ingestionRules,
|
|
episodeTimestamp:
|
|
episodeTimestamp?.toISOString() || new Date().toISOString(),
|
|
sessionContext,
|
|
};
|
|
|
|
// Route to appropriate normalization prompt based on content type
|
|
const messages =
|
|
contentType === EpisodeTypeEnum.DOCUMENT
|
|
? normalizeDocumentPrompt(context)
|
|
: normalizePrompt(context);
|
|
let responseText = "";
|
|
await makeModelCall(false, messages, (text) => {
|
|
responseText = text;
|
|
});
|
|
let normalizedEpisodeBody = "";
|
|
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
|
if (outputMatch && outputMatch[1]) {
|
|
normalizedEpisodeBody = outputMatch[1].trim();
|
|
} else {
|
|
// Log format violation and use fallback
|
|
logger.warn("Normalization response missing <output> tags", {
|
|
responseText: responseText.substring(0, 200) + "...",
|
|
source,
|
|
episodeLength: episodeBody.length,
|
|
});
|
|
|
|
// Fallback: use raw response if it's not empty and seems meaningful
|
|
const trimmedResponse = responseText.trim();
|
|
if (
|
|
trimmedResponse &&
|
|
trimmedResponse !== "NOTHING_TO_REMEMBER" &&
|
|
trimmedResponse.length > 10
|
|
) {
|
|
normalizedEpisodeBody = trimmedResponse;
|
|
logger.info("Using raw response as fallback for normalization", {
|
|
fallbackLength: trimmedResponse.length,
|
|
});
|
|
} else {
|
|
logger.warn("No usable normalization content found", {
|
|
responseText: responseText,
|
|
});
|
|
}
|
|
}
|
|
|
|
return normalizedEpisodeBody;
|
|
}
|
|
|
|
/**
|
|
* Retrieves related episodes and facts based on semantic similarity to the current episode content.
|
|
*
|
|
* @param episodeContent The content of the current episode
|
|
* @param userId The user ID
|
|
* @param source The source of the episode
|
|
* @param referenceTime The reference time for the episode
|
|
* @returns A string containing formatted related episodes and facts
|
|
*/
|
|
private async getRelatedMemories(
|
|
episodeContent: string,
|
|
userId: string,
|
|
options: {
|
|
episodeLimit?: number;
|
|
factLimit?: number;
|
|
minSimilarity?: number;
|
|
} = {},
|
|
): Promise<string> {
|
|
try {
|
|
// Default configuration values
|
|
const episodeLimit = options.episodeLimit ?? 5;
|
|
const factLimit = options.factLimit ?? 10;
|
|
const minSimilarity = options.minSimilarity ?? 0.75;
|
|
|
|
// Get embedding for the current episode content
|
|
const contentEmbedding = await this.getEmbedding(episodeContent);
|
|
|
|
// Retrieve semantically similar episodes (excluding very recent ones that are already in context)
|
|
const relatedEpisodes = await searchEpisodesByEmbedding({
|
|
embedding: contentEmbedding,
|
|
userId,
|
|
limit: episodeLimit,
|
|
minSimilarity,
|
|
});
|
|
|
|
// Retrieve semantically similar facts/statements
|
|
const relatedFacts = await searchStatementsByEmbedding({
|
|
embedding: contentEmbedding,
|
|
userId,
|
|
limit: factLimit,
|
|
minSimilarity,
|
|
});
|
|
|
|
// Format the related memories for inclusion in the prompt
|
|
let formattedMemories = "";
|
|
|
|
if (relatedEpisodes.length > 0) {
|
|
formattedMemories += "## Related Episodes\n";
|
|
relatedEpisodes.forEach((episode, index) => {
|
|
formattedMemories += `### Episode ${index + 1} (${new Date(episode.validAt).toISOString()})\n`;
|
|
formattedMemories += `${episode.content}\n\n`;
|
|
});
|
|
}
|
|
|
|
if (relatedFacts.length > 0) {
|
|
formattedMemories += "## Related Facts\n";
|
|
relatedFacts.forEach((fact) => {
|
|
formattedMemories += `- ${fact.fact}\n`;
|
|
});
|
|
}
|
|
|
|
return formattedMemories.trim();
|
|
} catch (error) {
|
|
console.error("Error retrieving related memories:", error);
|
|
return "";
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Retrieves active ingestion rules for a specific source and user
|
|
*/
|
|
private async getIngestionRulesForSource(
|
|
source: string,
|
|
userId: string,
|
|
prisma: PrismaClient,
|
|
): Promise<string | null> {
|
|
try {
|
|
// Import prisma here to avoid circular dependencies
|
|
|
|
// Get the user's workspace
|
|
const user = await prisma.user.findUnique({
|
|
where: { id: userId },
|
|
include: { Workspace: true },
|
|
});
|
|
|
|
if (!user?.Workspace) {
|
|
return null;
|
|
}
|
|
|
|
const integrationAccount = await prisma.integrationAccount.findFirst({
|
|
where: {
|
|
integrationDefinition: {
|
|
slug: source,
|
|
},
|
|
workspaceId: user.Workspace.id,
|
|
isActive: true,
|
|
deleted: null,
|
|
},
|
|
});
|
|
|
|
if (!integrationAccount) {
|
|
return null;
|
|
}
|
|
|
|
// Fetch active rules for this source
|
|
const rules = await prisma.ingestionRule.findMany({
|
|
where: {
|
|
source: integrationAccount.id,
|
|
workspaceId: user.Workspace.id,
|
|
isActive: true,
|
|
deleted: null,
|
|
},
|
|
select: {
|
|
text: true,
|
|
name: true,
|
|
},
|
|
orderBy: { createdAt: "asc" },
|
|
});
|
|
|
|
if (rules.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
// Format rules for the prompt
|
|
const formattedRules = rules
|
|
.map((rule, index) => {
|
|
const ruleName = rule.name ? `${rule.name}: ` : `Rule ${index + 1}: `;
|
|
return `${ruleName}${rule.text}`;
|
|
})
|
|
.join("\n");
|
|
|
|
return formattedRules;
|
|
} catch (error) {
|
|
console.error("Error retrieving ingestion rules:", error);
|
|
return null;
|
|
}
|
|
}
|
|
}
|