fix: remove attribute login

This commit is contained in:
Harshith Mullapudi 2025-10-20 12:18:14 +05:30
parent 95636f96a8
commit 1db2628af4
2 changed files with 128 additions and 213 deletions

View File

@ -12,11 +12,7 @@ import {
import { logger } from "./logger.service";
import { ClusteringService } from "./clustering.server";
import crypto from "crypto";
import {
dedupeNodes,
extractAttributes,
extractEntities,
} from "./prompts/nodes";
import { dedupeNodes, extractEntities } from "./prompts/nodes";
import {
extractStatements,
extractStatementsOSS,
@ -40,7 +36,11 @@ import {
saveTriple,
searchStatementsByEmbedding,
} from "./graphModels/statement";
import { getEmbedding, makeModelCall, isProprietaryModel } from "~/lib/model.server";
import {
getEmbedding,
makeModelCall,
isProprietaryModel,
} from "~/lib/model.server";
import { runQuery } from "~/lib/neo4j.server";
import { Apps, getNodeTypesString } from "~/utils/presets/nodes";
import { normalizePrompt, normalizeDocumentPrompt } from "./prompts";
@ -419,8 +419,8 @@ export class KnowledgeGraphService {
logger.log(`Processing time: ${processingTimeMs} ms`);
// Count only truly new statements (exclude duplicates)
const newStatementsCount = updatedTriples.filter(triple =>
triple.statement.createdAt >= episode.createdAt
const newStatementsCount = updatedTriples.filter(
(triple) => triple.statement.createdAt >= episode.createdAt,
).length;
return {
@ -442,7 +442,10 @@ export class KnowledgeGraphService {
private async extractEntities(
episode: EpisodicNode,
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
tokenMetrics: {
high: { input: number; output: number; total: number };
low: { input: number; output: number; total: number };
},
): Promise<EntityNode[]> {
// Use the prompt library to get the appropriate prompts
const context = {
@ -460,14 +463,20 @@ export class KnowledgeGraphService {
let responseText = "";
// Entity extraction requires HIGH complexity (creative reasoning, nuanced NER)
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.high.input += usage.promptTokens;
tokenMetrics.high.output += usage.completionTokens;
tokenMetrics.high.total += usage.totalTokens;
}
}, undefined, 'high');
await makeModelCall(
false,
messages as CoreMessage[],
(text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.high.input += usage.promptTokens;
tokenMetrics.high.output += usage.completionTokens;
tokenMetrics.high.total += usage.totalTokens;
}
},
undefined,
"high",
);
// Convert to EntityNode objects
let entities: EntityNode[] = [];
@ -478,19 +487,23 @@ export class KnowledgeGraphService {
responseText = outputMatch[1].trim();
const parsedResponse = JSON.parse(responseText || "[]");
// Handle both old format {entities: [...]} and new format [...]
const extractedEntities = Array.isArray(parsedResponse) ? parsedResponse : (parsedResponse.entities || []);
const extractedEntities = Array.isArray(parsedResponse)
? parsedResponse
: parsedResponse.entities || [];
// Batch generate embeddings for entity names
const entityNames = Array.isArray(extractedEntities[0]) ? extractedEntities : extractedEntities.map((entity: any) => entity.name || entity);
const entityNames = Array.isArray(extractedEntities[0])
? extractedEntities
: extractedEntities.map((entity: any) => entity.name || entity);
const nameEmbeddings = await Promise.all(
entityNames.map((name: string) => this.getEmbedding(name)),
);
entities = extractedEntities.map((entity: any, index: number) => ({
uuid: crypto.randomUUID(),
name: typeof entity === 'string' ? entity : entity.name,
name: typeof entity === "string" ? entity : entity.name,
type: undefined, // Type will be inferred from statements
attributes: typeof entity === 'string' ? {} : (entity.attributes || {}),
attributes: typeof entity === "string" ? {} : entity.attributes || {},
nameEmbedding: nameEmbeddings[index],
typeEmbedding: undefined, // No type embedding needed
createdAt: new Date(),
@ -512,7 +525,10 @@ export class KnowledgeGraphService {
expanded: EntityNode[];
},
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
tokenMetrics: {
high: { input: number; output: number; total: number };
low: { input: number; output: number; total: number };
},
): Promise<Triple[]> {
// Use the prompt library to get the appropriate prompts
const context = {
@ -534,22 +550,28 @@ export class KnowledgeGraphService {
referenceTime: episode.validAt.toISOString(),
};
console.log("proprietary model", isProprietaryModel(undefined, 'high'));
console.log("proprietary model", isProprietaryModel(undefined, "high"));
// Statement extraction requires HIGH complexity (causal reasoning, emotional context)
// Choose between proprietary and OSS prompts based on model type
const messages = isProprietaryModel(undefined, 'high')
const messages = isProprietaryModel(undefined, "high")
? extractStatements(context)
: extractStatementsOSS(context);
let responseText = "";
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.high.input += usage.promptTokens;
tokenMetrics.high.output += usage.completionTokens;
tokenMetrics.high.total += usage.totalTokens;
}
}, undefined, 'high');
await makeModelCall(
false,
messages as CoreMessage[],
(text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.high.input += usage.promptTokens;
tokenMetrics.high.output += usage.completionTokens;
tokenMetrics.high.total += usage.totalTokens;
}
},
undefined,
"high",
);
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {
@ -561,9 +583,11 @@ export class KnowledgeGraphService {
// Parse the statements from the LLM response
const parsedResponse = JSON.parse(responseText || "[]");
// Handle both old format {"edges": [...]} and new format [...]
const extractedTriples: ExtractedTripleData[] = Array.isArray(parsedResponse)
const extractedTriples: ExtractedTripleData[] = Array.isArray(
parsedResponse,
)
? parsedResponse
: (parsedResponse.edges || []);
: parsedResponse.edges || [];
console.log(`extracted triples length: ${extractedTriples.length}`);
@ -683,7 +707,10 @@ export class KnowledgeGraphService {
triples: Triple[],
episode: EpisodicNode,
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
tokenMetrics: {
high: { input: number; output: number; total: number };
low: { input: number; output: number; total: number };
},
): Promise<Triple[]> {
// Step 1: Extract unique entities from triples
const uniqueEntitiesMap = new Map<string, EntityNode>();
@ -810,14 +837,20 @@ export class KnowledgeGraphService {
let responseText = "";
// Entity deduplication is LOW complexity (pattern matching, similarity comparison)
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
await makeModelCall(
false,
messages as CoreMessage[],
(text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
},
undefined,
"low",
);
// Step 5: Process LLM response
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
@ -898,7 +931,10 @@ export class KnowledgeGraphService {
triples: Triple[],
episode: EpisodicNode,
previousEpisodes: EpisodicNode[],
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
tokenMetrics: {
high: { input: number; output: number; total: number };
low: { input: number; output: number; total: number };
},
): Promise<{
resolvedStatements: Triple[];
invalidatedStatements: string[];
@ -911,7 +947,10 @@ export class KnowledgeGraphService {
}
// Step 1: Collect all potential matches for all triples at once
const allPotentialMatches: Map<string, Omit<StatementNode, "factEmbedding">[]> = new Map();
const allPotentialMatches: Map<
string,
Omit<StatementNode, "factEmbedding">[]
> = new Map();
const allExistingTripleData: Map<string, Triple> = new Map();
// For preparing the LLM context
@ -971,7 +1010,8 @@ export class KnowledgeGraphService {
}
// Phase 3: Check related memories for contradictory statements
const previousEpisodesStatements: Omit<StatementNode, "factEmbedding">[] = [];
const previousEpisodesStatements: Omit<StatementNode, "factEmbedding">[] =
[];
await Promise.all(
previousEpisodes.map(async (episode) => {
@ -1052,14 +1092,20 @@ export class KnowledgeGraphService {
let responseText = "";
// Statement resolution is LOW complexity (rule-based duplicate/contradiction detection)
await makeModelCall(false, messages, (text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
await makeModelCall(
false,
messages,
(text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
},
undefined,
"low",
);
try {
// Extract the JSON response from the output tags
@ -1134,90 +1180,6 @@ export class KnowledgeGraphService {
return { resolvedStatements, invalidatedStatements };
}
/**
* Add attributes to entity nodes based on the resolved statements
*/
private async addAttributesToEntities(
triples: Triple[],
episode: EpisodicNode,
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
): Promise<Triple[]> {
// Collect all unique entities from the triples
const entityMap = new Map<string, EntityNode>();
// Add all subjects, predicates, and objects to the map
triples.forEach((triple) => {
if (triple.subject) {
entityMap.set(triple.subject.uuid, triple.subject);
}
if (triple.predicate) {
entityMap.set(triple.predicate.uuid, triple.predicate);
}
if (triple.object) {
entityMap.set(triple.object.uuid, triple.object);
}
});
// Convert the map to an array of entities
const entities = Array.from(entityMap.values());
if (entities.length === 0) {
return triples; // No entities to process
}
// Prepare simplified context for the LLM
const context = {
episodeContent: episode.content,
entities: entities.map((entity) => ({
uuid: entity.uuid,
name: entity.name,
currentAttributes: entity.attributes || {},
})),
};
// Create a prompt for the LLM to extract attributes
const messages = extractAttributes(context);
let responseText = "";
// Attribute extraction is LOW complexity (simple key-value extraction)
await makeModelCall(false, messages as CoreMessage[], (text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'low');
try {
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {
responseText = outputMatch[1].trim();
}
// Parse the LLM response
const responseData = JSON.parse(responseText);
const updatedEntities = responseData.entities || [];
// Update entity attributes and save them
for (const updatedEntity of updatedEntities) {
const entity = entityMap.get(updatedEntity.uuid);
if (entity) {
// Merge the existing attributes with the new ones
entity.attributes = {
...updatedEntity.attributes,
};
}
}
logger.info(`Updated attributes for ${updatedEntities.length} entities`);
} catch (error) {
logger.error("Error processing entity attributes", { error });
}
return triples;
}
/**
* Normalize an episode by extracting entities and creating nodes and statements
*/
@ -1226,7 +1188,10 @@ export class KnowledgeGraphService {
source: string,
userId: string,
prisma: PrismaClient,
tokenMetrics: { high: { input: number; output: number; total: number }; low: { input: number; output: number; total: number } },
tokenMetrics: {
high: { input: number; output: number; total: number };
low: { input: number; output: number; total: number };
},
episodeTimestamp?: Date,
sessionContext?: string,
contentType?: EpisodeType,
@ -1263,14 +1228,20 @@ export class KnowledgeGraphService {
: normalizePrompt(context);
// Normalization is LOW complexity (text cleaning and standardization)
let responseText = "";
await makeModelCall(false, messages, (text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
}, undefined, 'high');
await makeModelCall(
false,
messages,
(text, _model, usage) => {
responseText = text;
if (usage) {
tokenMetrics.low.input += usage.promptTokens;
tokenMetrics.low.output += usage.completionTokens;
tokenMetrics.low.total += usage.totalTokens;
}
},
undefined,
"high",
);
let normalizedEpisodeBody = "";
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
if (outputMatch && outputMatch[1]) {

View File

@ -8,8 +8,8 @@ import { type CoreMessage } from "ai";
* Extract entities from content using unified approach (works for both conversations and documents)
*/
export const extractEntities = (
context: Record<string, any>,
extractionMode: 'conversation' | 'document' = 'conversation'
context: Record<string, any>,
extractionMode: "conversation" | "document" = "conversation",
): CoreMessage[] => {
const sysPrompt = `You are an AI assistant that extracts entity nodes from conversational messages for a reified knowledge graph.
Your primary task is to extract all significant entities mentioned in the conversation, treating both concrete entities and type/concept entities as first-class nodes.
@ -204,13 +204,18 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
4. End with exactly: </output>
5. NO additional text, NO comments, NO explanations`;
const contentLabel = extractionMode === 'conversation' ? 'CURRENT EPISODE' : 'TEXT';
const contentLabel =
extractionMode === "conversation" ? "CURRENT EPISODE" : "TEXT";
const userPrompt = `
${extractionMode === 'conversation' ? `<PREVIOUS EPISODES>
${
extractionMode === "conversation"
? `<PREVIOUS EPISODES>
${JSON.stringify(context.previousEpisodes || [], null, 2)}
</PREVIOUS EPISODES>
` : ''}<${contentLabel}>
`
: ""
}<${contentLabel}>
${context.episodeContent}
</${contentLabel}>
@ -222,7 +227,6 @@ ${context.episodeContent}
];
};
/**
* Resolve entity duplications
*/
@ -325,63 +329,3 @@ ${JSON.stringify(context.extracted_nodes, null, 2)}
},
];
};
export const extractAttributes = (
context: Record<string, any>,
): CoreMessage[] => {
const sysPrompt = `
You are an AI assistant that extracts and enhances entity attributes based on context.
Your task is to analyze entities and provide appropriate attribute values based on available information.
For each entity:
1. Analyze the context to identify relevant attributes for the entity
2. Extract appropriate values from the episode content if available
3. Focus on factual, descriptive attributes rather than type classifications
4. Give empty attributes object ({}) when there are no attributes to update
5. Only include attributes that you're adding or modifying
6. I'll merge your new attributes with existing ones, so only provide updates
Common attribute types to consider:
- Descriptive properties (color, size, status, etc.)
- Relational context (role, position, relationship, etc.)
- Temporal information (duration, frequency, etc.)
- Qualitative aspects (importance, preference, etc.)
## CRITICAL OUTPUT FORMAT REQUIREMENTS:
**YOU MUST STRICTLY FOLLOW THESE FORMAT RULES:**
1. **ALWAYS use <output> tags** - Never use any other tag format
2. **ONLY output valid JSON** within the <output> tags
3. **NO additional text** before or after the <output> tags
4. **NO comments** inside the JSON
5. **REQUIRED structure:** Must follow exact JSON schema shown below
<output>
{
"entities": [
{
"uuid": "entity-uuid",
"attributes": {
"attributeName1": "value1",
"attributeName2": "value2"
}
}
]
}
</output>`;
const userPrompt = `
<ENTITIES>
${JSON.stringify(context.entities, null, 2)}
</ENTITIES>
<EPISODE_CONTENT>
${context.episodeContent}
</EPISODE_CONTENT>
Based on the above information, please extract and enhance attributes for each entity based on the context. Return only the uuid and updated attributes for each entity.`;
return [
{ role: "system", content: sysPrompt },
{ role: "user", content: userPrompt },
];
};