mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 09:58:28 +00:00
fix: skip unchanged docs, and enhance entity extraction prompts
This commit is contained in:
parent
840ca64174
commit
a4b6a4f984
@ -4,7 +4,7 @@ import type { DocumentNode } from "@core/types";
|
|||||||
|
|
||||||
export interface DifferentialDecision {
|
export interface DifferentialDecision {
|
||||||
shouldUseDifferential: boolean;
|
shouldUseDifferential: boolean;
|
||||||
strategy: "full_reingest" | "chunk_level_diff" | "new_document";
|
strategy: "full_reingest" | "chunk_level_diff" | "new_document" | "skip_processing";
|
||||||
reason: string;
|
reason: string;
|
||||||
changedChunkIndices: number[];
|
changedChunkIndices: number[];
|
||||||
changePercentage: number;
|
changePercentage: number;
|
||||||
@ -59,7 +59,7 @@ export class DocumentDifferentialService {
|
|||||||
if (existingDocument.contentHash === newChunkedDocument.contentHash) {
|
if (existingDocument.contentHash === newChunkedDocument.contentHash) {
|
||||||
return {
|
return {
|
||||||
shouldUseDifferential: false,
|
shouldUseDifferential: false,
|
||||||
strategy: "full_reingest", // No changes detected
|
strategy: "skip_processing", // No changes detected
|
||||||
reason: "Document content unchanged",
|
reason: "Document content unchanged",
|
||||||
changedChunkIndices: [],
|
changedChunkIndices: [],
|
||||||
changePercentage: 0,
|
changePercentage: 0,
|
||||||
|
|||||||
@ -248,3 +248,75 @@ export async function getDocumentVersions(
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete a document and all its related episodes, statements, and entities efficiently
|
||||||
|
* Uses optimized Cypher patterns for bulk deletion
|
||||||
|
*/
|
||||||
|
export async function deleteDocument(documentUuid: string): Promise<{
|
||||||
|
documentsDeleted: number;
|
||||||
|
episodesDeleted: number;
|
||||||
|
statementsDeleted: number;
|
||||||
|
entitiesDeleted: number;
|
||||||
|
}> {
|
||||||
|
const query = `
|
||||||
|
MATCH (d:Document {uuid: $documentUuid})
|
||||||
|
|
||||||
|
// Get all related data first
|
||||||
|
OPTIONAL MATCH (d)-[:CONTAINS_CHUNK]->(e:Episode)
|
||||||
|
OPTIONAL MATCH (e)-[:CONTAINS]->(s:Statement)
|
||||||
|
OPTIONAL MATCH (s)-[:REFERENCES]->(entity:Entity)
|
||||||
|
|
||||||
|
// Count entities that will become orphaned
|
||||||
|
WITH d, collect(DISTINCT e) as episodes, collect(DISTINCT s) as statements, collect(DISTINCT entity) as entities
|
||||||
|
UNWIND entities as entity
|
||||||
|
OPTIONAL MATCH (entity)<-[:REFERENCES]-(otherStmt:Statement)
|
||||||
|
WHERE NOT otherStmt IN statements
|
||||||
|
|
||||||
|
WITH d, episodes, statements,
|
||||||
|
collect(CASE WHEN otherStmt IS NULL THEN entity ELSE null END) as orphanedEntities
|
||||||
|
|
||||||
|
// Delete statements (breaks references to entities)
|
||||||
|
FOREACH (stmt IN statements | DETACH DELETE stmt)
|
||||||
|
|
||||||
|
// Delete orphaned entities only (filter nulls first)
|
||||||
|
WITH d, episodes, statements, [entity IN orphanedEntities WHERE entity IS NOT NULL] as validOrphanedEntities
|
||||||
|
FOREACH (entity IN validOrphanedEntities | DETACH DELETE entity)
|
||||||
|
|
||||||
|
// Delete episodes
|
||||||
|
FOREACH (episode IN episodes | DETACH DELETE episode)
|
||||||
|
|
||||||
|
// Delete document
|
||||||
|
DETACH DELETE d
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
1 as documentsDeleted,
|
||||||
|
size(episodes) as episodesDeleted,
|
||||||
|
size(statements) as statementsDeleted,
|
||||||
|
size(validOrphanedEntities) as entitiesDeleted
|
||||||
|
`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await runQuery(query, { documentUuid });
|
||||||
|
|
||||||
|
if (result.length === 0) {
|
||||||
|
return {
|
||||||
|
documentsDeleted: 0,
|
||||||
|
episodesDeleted: 0,
|
||||||
|
statementsDeleted: 0,
|
||||||
|
entitiesDeleted: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const record = result[0];
|
||||||
|
return {
|
||||||
|
documentsDeleted: record.get("documentsDeleted") || 0,
|
||||||
|
episodesDeleted: record.get("episodesDeleted") || 0,
|
||||||
|
statementsDeleted: record.get("statementsDeleted") || 0,
|
||||||
|
entitiesDeleted: record.get("entitiesDeleted") || 0,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error deleting document:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@ -384,8 +384,10 @@ export class KnowledgeGraphService {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save triples in parallel for better performance
|
// Process triples sequentially to avoid race conditions
|
||||||
await Promise.all(updatedTriples.map((triple) => saveTriple(triple)));
|
for (const triple of updatedTriples) {
|
||||||
|
await saveTriple(triple);
|
||||||
|
}
|
||||||
|
|
||||||
const saveTriplesTime = Date.now();
|
const saveTriplesTime = Date.now();
|
||||||
logger.log(`Saved triples in ${saveTriplesTime - updatedTriplesTime} ms`);
|
logger.log(`Saved triples in ${saveTriplesTime - updatedTriplesTime} ms`);
|
||||||
|
|||||||
@ -31,19 +31,33 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
|||||||
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||||
- **TYPE/CONCEPT ENTITIES**: When text contains "X is a Y" statements, extract BOTH X and Y as separate entities.
|
- **TYPE/CONCEPT ENTITIES**: When text contains "X is a Y" statements, extract BOTH X and Y as separate entities.
|
||||||
|
|
||||||
2. **Type and Concept Entity Extraction**:
|
2. **IMPLICIT ACTOR EXTRACTION**:
|
||||||
|
- **EXPERIENCE AGENTS**: Extract the entity who performs actions, makes decisions, or has subjective experiences
|
||||||
|
- **PERSPECTIVE HOLDERS**: Extract entities behind opinions, preferences, memories, and evaluations
|
||||||
|
- **DOCUMENT ACTORS**: For personal content (journals, notes, reports), extract the implied author/creator
|
||||||
|
- **PRONOUN RESOLUTION**: Extract the entity represented by first-person pronouns in narrative content
|
||||||
|
- **ACTION SUBJECTS**: When actions are described without explicit subjects, infer and extract the acting entity
|
||||||
|
|
||||||
|
**Detection Signals**:
|
||||||
|
- Action descriptions without explicit subjects
|
||||||
|
- Opinion/evaluation expressions
|
||||||
|
- Decision-making language
|
||||||
|
- Personal experience descriptions
|
||||||
|
- Memory/reflection statements
|
||||||
|
|
||||||
|
3. **Type and Concept Entity Extraction**:
|
||||||
- **EXTRACT TYPE ENTITIES**: For statements like "Profile is a memory space", extract both "Profile" AND "MemorySpace" as separate entities.
|
- **EXTRACT TYPE ENTITIES**: For statements like "Profile is a memory space", extract both "Profile" AND "MemorySpace" as separate entities.
|
||||||
- **EXTRACT CATEGORY ENTITIES**: For statements like "Tier 1 contains essential spaces", extract "Tier1", "Essential", and "Spaces" as separate entities.
|
- **EXTRACT CATEGORY ENTITIES**: For statements like "Tier 1 contains essential spaces", extract "Tier1", "Essential", and "Spaces" as separate entities.
|
||||||
- **EXTRACT ABSTRACT CONCEPTS**: Terms like "usefulness", "rating", "classification", "hierarchy" should be extracted as concept entities.
|
- **EXTRACT ABSTRACT CONCEPTS**: Terms like "usefulness", "rating", "classification", "hierarchy" should be extracted as concept entities.
|
||||||
- **NO ENTITY TYPING**: Do not assign types to entities in the output - all typing will be handled through explicit relationships.
|
- **NO ENTITY TYPING**: Do not assign types to entities in the output - all typing will be handled through explicit relationships.
|
||||||
|
|
||||||
3. **Exclusions**:
|
4. **Exclusions**:
|
||||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||||
- **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements.
|
- **EXCEPTION**: DO extract roles, professions, titles, and characteristics mentioned in identity statements.
|
||||||
- Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately.
|
- Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately.
|
||||||
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
|
- Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm").
|
||||||
|
|
||||||
4. **Entity Name Extraction**:
|
5. **Entity Name Extraction**:
|
||||||
- Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
|
- Extract ONLY the core entity name, WITHOUT any descriptors or qualifiers
|
||||||
- When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car"
|
- When text mentions "Tesla car", extract TWO entities: "Tesla" AND "Car"
|
||||||
- When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
|
- When text mentions "memory space system", extract "Memory", "Space", AND "System" as separate entities
|
||||||
@ -52,7 +66,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
|||||||
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
- **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John")
|
||||||
- **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space")
|
- **CONCEPT NORMALIZATION**: Convert to singular form where appropriate ("spaces" → "Space")
|
||||||
|
|
||||||
5. **Temporal and Relationship Context Extraction**:
|
6. **Temporal and Relationship Context Extraction**:
|
||||||
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
|
- EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years")
|
||||||
- EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
|
- EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college")
|
||||||
- EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
|
- EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members")
|
||||||
|
|||||||
@ -12,7 +12,65 @@ export const extractStatements = (
|
|||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
role: "system",
|
role: "system",
|
||||||
content: `You are a knowledge graph expert who extracts NEW factual statements from text as subject-predicate-object triples.
|
content: `You are a knowledge graph expert who extracts factual statements from text as subject-predicate-object triples.
|
||||||
|
|
||||||
|
## PHASE 1: FOUNDATIONAL RELATIONSHIPS (HIGHEST PRIORITY)
|
||||||
|
Extract the basic semantic backbone that answers: WHO, WHAT, WHERE, WHEN, WHY, HOW
|
||||||
|
|
||||||
|
### 1A: ACTOR-ACTION RELATIONSHIPS
|
||||||
|
- Subject performs action: "Entity" "performed" "Action"
|
||||||
|
- Subject experiences state: "Entity" "experienced" "State"
|
||||||
|
- Subject has attribute: "Entity" "has" "Property"
|
||||||
|
- Subject creates/produces: "Entity" "created" "Object"
|
||||||
|
|
||||||
|
### 1B: SPATIAL & HIERARCHICAL RELATIONSHIPS
|
||||||
|
- Location membership: "Entity" "located_in" "Location"
|
||||||
|
- Categorical membership: "Entity" "is_a" "Category"
|
||||||
|
- Hierarchical structure: "Entity" "part_of" "System"
|
||||||
|
- Containment: "Container" "contains" "Item"
|
||||||
|
|
||||||
|
### 1C: TEMPORAL & SEQUENTIAL RELATIONSHIPS
|
||||||
|
- Duration facts: "Event" "lasted" "Duration"
|
||||||
|
- Sequence facts: "Event" "occurred_before" "Event"
|
||||||
|
- Temporal anchoring: "Event" "occurred_during" "Period"
|
||||||
|
- Timing: "Action" "happened_on" "Date"
|
||||||
|
|
||||||
|
### 1D: SUBJECTIVE & EVALUATIVE RELATIONSHIPS
|
||||||
|
- Opinions: "Subject" "opinion_about" "Object"
|
||||||
|
- Preferences: "Subject" "prefers" "Object"
|
||||||
|
- Evaluations: "Subject" "rated" "Object"
|
||||||
|
- Desires: "Subject" "wants" "Object"
|
||||||
|
|
||||||
|
## SYSTEMATIC EXTRACTION METHODOLOGY
|
||||||
|
For each entity, systematically check these common patterns:
|
||||||
|
|
||||||
|
**Type/Category Patterns**: Entity → is_a → Type
|
||||||
|
**Ownership Patterns**: Actor → owns/controls → Resource
|
||||||
|
**Participation Patterns**: Actor → participates_in → Event
|
||||||
|
**Location Patterns**: Entity → located_in/part_of → Place
|
||||||
|
**Temporal Patterns**: Event → occurred_during → TimeFrame
|
||||||
|
**Rating/Measurement Patterns**: Subject → rated/measured → Object
|
||||||
|
**Reference Patterns**: Document → references → Entity
|
||||||
|
**Employment Patterns**: Person → works_for → Organization
|
||||||
|
|
||||||
|
## RELATIONSHIP QUALITY HIERARCHY
|
||||||
|
|
||||||
|
**ESSENTIAL (Extract Always)**:
|
||||||
|
- Categorical membership (is_a, type_of)
|
||||||
|
- Spatial relationships (located_in, part_of)
|
||||||
|
- Actor-action relationships (performed, experienced, created)
|
||||||
|
- Ownership/control relationships (owns, controls, manages)
|
||||||
|
- Employment relationships (works_for, employed_by)
|
||||||
|
|
||||||
|
**VALUABLE (Extract When Present)**:
|
||||||
|
- Temporal sequences and durations
|
||||||
|
- Subjective opinions and evaluations
|
||||||
|
- Cross-references and citations
|
||||||
|
- Participation and attendance
|
||||||
|
|
||||||
|
**CONTEXTUAL (Extract If Space Permits)**:
|
||||||
|
- Complex multi-hop inferences
|
||||||
|
- Implicit relationships requiring interpretation
|
||||||
|
|
||||||
CRITICAL REQUIREMENT:
|
CRITICAL REQUIREMENT:
|
||||||
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
|
- You MUST ONLY use entities from the AVAILABLE ENTITIES list as subjects and objects.
|
||||||
@ -30,54 +88,20 @@ RELATIONSHIP FORMATION RULES:
|
|||||||
2. **PRIMARY-EXPANDED**: Only if the expanded entity is mentioned in the episode content
|
2. **PRIMARY-EXPANDED**: Only if the expanded entity is mentioned in the episode content
|
||||||
3. **EXPANDED-EXPANDED**: Avoid unless there's explicit connection in the episode
|
3. **EXPANDED-EXPANDED**: Avoid unless there's explicit connection in the episode
|
||||||
|
|
||||||
FOCUS: Create relationships that ADD VALUE to understanding the current episode, not just because entities are available.
|
Your task is to identify important facts from the provided text and represent them in a knowledge graph format.
|
||||||
|
|
||||||
## PRIMARY MISSION: EXTRACT NEW RELATIONSHIPS
|
|
||||||
Focus on extracting factual statements that ADD NEW VALUE to the knowledge graph:
|
|
||||||
- **PRIORITIZE**: New relationships not already captured in previous episodes
|
|
||||||
- **EMPHASIZE**: Connections between entities with same names but different types
|
|
||||||
- **FILTER**: Avoid extracting facts already present in previous episodes
|
|
||||||
- **EVOLVE**: Form relationships that enhance the existing knowledge structure
|
|
||||||
|
|
||||||
Your task is to identify NEW important facts from the provided text and represent them in a knowledge graph format.
|
|
||||||
|
|
||||||
Follow these instructions:
|
Follow these instructions:
|
||||||
|
|
||||||
1. **ANALYZE PREVIOUS EPISODES**: Review previous episodes to understand what relationships already exist
|
1. **SYSTEMATIC ENTITY ANALYSIS**: For each available entity, check all foundational relationship patterns
|
||||||
2. **REVIEW AVAILABLE ENTITIES**: Carefully examine the AVAILABLE ENTITIES list - these are the ONLY entities you can use as subjects and objects
|
2. **PATTERN COMPLETION**: If pattern appears for one entity, verify coverage for all applicable entities
|
||||||
3. **IDENTIFY SAME-NAME ENTITIES**: Look for entities with identical names but different types - these often represent natural relationships that should be explicitly connected
|
3. **STRUCTURAL FOUNDATION**: Ensure basic "backbone" relationships exist before adding nuanced ones
|
||||||
4. **EXTRACT NEW RELATIONSHIPS**: Identify factual statements that can be expressed using ONLY available entities AND are NOT already captured in previous episodes
|
4. **REVIEW AVAILABLE ENTITIES**: Carefully examine the AVAILABLE ENTITIES list - these are the ONLY entities you can use as subjects and objects
|
||||||
5. For each NEW valid statement, provide:
|
5. **IDENTIFY SAME-NAME ENTITIES**: Look for entities with identical names but different types - these often represent natural relationships that should be explicitly connected
|
||||||
|
6. For each valid statement, provide:
|
||||||
- source: The subject entity (MUST be from AVAILABLE ENTITIES)
|
- source: The subject entity (MUST be from AVAILABLE ENTITIES)
|
||||||
- predicate: The relationship type (can be a descriptive phrase)
|
- predicate: The relationship type (can be a descriptive phrase)
|
||||||
- target: The object entity (MUST be from AVAILABLE ENTITIES)
|
- target: The object entity (MUST be from AVAILABLE ENTITIES)
|
||||||
|
|
||||||
EXTRACT NEW MEANINGFUL RELATIONSHIPS AND CHARACTERISTICS:
|
|
||||||
- Extract meaningful relationships between available entities that are NOT already captured in previous episodes
|
|
||||||
- Extract individual entity characteristics, roles, and properties as standalone facts
|
|
||||||
- Use predicates that accurately describe new relationships between entities
|
|
||||||
- Be creative but precise in identifying NEW relationships - focus on value-adding connections
|
|
||||||
- **HIGHEST PRIORITY**: Entities with identical names but different types MUST be connected with explicit relationship statements
|
|
||||||
- **MANDATORY**: When you find entities like "John (Person)" and "John (Company)", create explicit relationships such as "John" "owns" "John" or "John" "founded" "John"
|
|
||||||
- **ROLE/CHARACTERISTIC EXTRACTION**: Always extract roles, professions, titles, and key characteristics as separate statements
|
|
||||||
- Look for both explicit and implicit NEW relationships mentioned in the text
|
|
||||||
- **FILTER OUT**: Relationships already established in previous episodes unless they represent updates or changes
|
|
||||||
- Common relationship types include (but are not limited to):
|
|
||||||
* **Roles and professions** (e.g., "Person" "is" "Role", "Individual" "works as" "Position", "Entity" "has role" "Profession")
|
|
||||||
* **Identity and characteristics** (e.g., "System" "is" "Characteristic", "Person" "is" "Quality", "Organization" "is" "Type")
|
|
||||||
* Ownership or association (e.g., "Alice" "owns" "Restaurant")
|
|
||||||
* Participation or attendance (e.g., "Team" "participates in" "Tournament")
|
|
||||||
* Personal connections (e.g., "Sarah" "works with" "Michael")
|
|
||||||
* Aliases and alternative names (e.g., "Robert" "is also known as" "Bob")
|
|
||||||
* Locations and spatial relationships (e.g., "Office" "located in" "Building")
|
|
||||||
* Characteristics and properties (e.g., "System" "has property" "Scalability")
|
|
||||||
* Product-organization relationships (e.g., "Software" "developed by" "Company")
|
|
||||||
* Technical dependencies and usage (e.g., "Application" "uses" "Database")
|
|
||||||
* Hierarchical relationships (e.g., "Manager" "supervises" "Employee")
|
|
||||||
* Duration relationships (e.g., "Caroline" "has known" "friends" [duration: "4 years"])
|
|
||||||
* Temporal sequence relationships (e.g., "Caroline" "met" "friends" [context: "since moving"])
|
|
||||||
* Contextual support relationships (e.g., "friends" "supported" "Caroline" [context: "during breakup"])
|
|
||||||
|
|
||||||
## SAME-NAME ENTITY RELATIONSHIP FORMATION
|
## SAME-NAME ENTITY RELATIONSHIP FORMATION
|
||||||
When entities share identical names but have different types, CREATE explicit relationship statements:
|
When entities share identical names but have different types, CREATE explicit relationship statements:
|
||||||
- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)"
|
- **Person-Organization**: "John (Person)" → "owns", "founded", "works for", or "leads" → "John (Company)"
|
||||||
@ -100,12 +124,12 @@ EXAMPLES of correct Duration/TemporalContext usage:
|
|||||||
* DO NOT CREATE: "Caroline" "relates to" "4 years" (Duration as object)
|
* DO NOT CREATE: "Caroline" "relates to" "4 years" (Duration as object)
|
||||||
* DO NOT CREATE: "since moving" "describes" "friendship" (TemporalContext as subject)
|
* DO NOT CREATE: "since moving" "describes" "friendship" (TemporalContext as subject)
|
||||||
|
|
||||||
## PREVIOUS EPISODE FILTERING
|
## EXTRACTION COMPLETENESS MANDATE
|
||||||
Before creating any relationship statement:
|
- **EXTRACT OBVIOUS FACTS**: Basic relationships are STRUCTURAL FOUNDATIONS, not redundant noise
|
||||||
- **CHECK**: Review previous episodes to see if this exact relationship already exists
|
- **PRIORITIZE SIMPLE OVER COMPLEX**: "X is_in Y" is more valuable than "X contextually_relates_to Y"
|
||||||
- **SKIP**: Do not create statements that duplicate existing relationships
|
- **QUANTITY OVER NOVELTY**: Comprehensive coverage beats selective "interesting" facts
|
||||||
- **ENHANCE**: Only create statements if they add new information or represent updates
|
- **SYSTEMATIC ENUMERATION**: If pattern exists for one entity, check ALL entities for same pattern
|
||||||
- **FOCUS**: Prioritize completely new connections not represented in the knowledge graph
|
- Only skip exact duplicate statements, not similar relationship types
|
||||||
|
|
||||||
CRITICAL TEMPORAL INFORMATION HANDLING:
|
CRITICAL TEMPORAL INFORMATION HANDLING:
|
||||||
- For events with specific dates/times, ALWAYS capture temporal information in statement attributes
|
- For events with specific dates/times, ALWAYS capture temporal information in statement attributes
|
||||||
|
|||||||
@ -512,7 +512,7 @@ export async function applyCohereReranking(
|
|||||||
cohereScore: result.relevanceScore,
|
cohereScore: result.relevanceScore,
|
||||||
cohereRank: index + 1,
|
cohereRank: index + 1,
|
||||||
}))
|
}))
|
||||||
.filter((result) => result.cohereScore > 0.3);
|
.filter((result) => result.cohereScore >= 0.1);
|
||||||
|
|
||||||
const responseTime = Date.now() - startTime;
|
const responseTime = Date.now() - startTime;
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@ -87,6 +87,18 @@ export const ingestDocumentTask = task({
|
|||||||
documentSizeTokens: differentialDecision.documentSizeTokens,
|
documentSizeTokens: differentialDecision.documentSizeTokens,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Early return for unchanged documents
|
||||||
|
if (differentialDecision.strategy === "skip_processing") {
|
||||||
|
logger.log("Document content unchanged, skipping processing");
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
documentsProcessed: 1,
|
||||||
|
chunksProcessed: 0,
|
||||||
|
episodesCreated: 0,
|
||||||
|
entitiesExtracted: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// Step 3: Save the new document version
|
// Step 3: Save the new document version
|
||||||
await saveDocument(document);
|
await saveDocument(document);
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user