mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 09:58:28 +00:00
feat: add normalize episode content before storage
This commit is contained in:
parent
56adc246c8
commit
02c7b90374
@ -6,6 +6,7 @@ export async function saveEpisode(episode: EpisodicNode): Promise<string> {
|
|||||||
MERGE (e:Episode {uuid: $uuid})
|
MERGE (e:Episode {uuid: $uuid})
|
||||||
ON CREATE SET
|
ON CREATE SET
|
||||||
e.content = $content,
|
e.content = $content,
|
||||||
|
e.originalContent = $originalContent,
|
||||||
e.contentEmbedding = $contentEmbedding,
|
e.contentEmbedding = $contentEmbedding,
|
||||||
e.type = $type,
|
e.type = $type,
|
||||||
e.source = $source,
|
e.source = $source,
|
||||||
@ -18,6 +19,7 @@ export async function saveEpisode(episode: EpisodicNode): Promise<string> {
|
|||||||
ON MATCH SET
|
ON MATCH SET
|
||||||
e.content = $content,
|
e.content = $content,
|
||||||
e.contentEmbedding = $contentEmbedding,
|
e.contentEmbedding = $contentEmbedding,
|
||||||
|
e.originalContent = $originalContent,
|
||||||
e.type = $type,
|
e.type = $type,
|
||||||
e.source = $source,
|
e.source = $source,
|
||||||
e.validAt = $validAt,
|
e.validAt = $validAt,
|
||||||
@ -30,6 +32,7 @@ export async function saveEpisode(episode: EpisodicNode): Promise<string> {
|
|||||||
const params = {
|
const params = {
|
||||||
uuid: episode.uuid,
|
uuid: episode.uuid,
|
||||||
content: episode.content,
|
content: episode.content,
|
||||||
|
originalContent: episode.originalContent,
|
||||||
source: episode.source,
|
source: episode.source,
|
||||||
type: episode.type,
|
type: episode.type,
|
||||||
userId: episode.userId || null,
|
userId: episode.userId || null,
|
||||||
@ -59,6 +62,7 @@ export async function getEpisode(uuid: string): Promise<EpisodicNode | null> {
|
|||||||
return {
|
return {
|
||||||
uuid: episode.uuid,
|
uuid: episode.uuid,
|
||||||
content: episode.content,
|
content: episode.content,
|
||||||
|
originalContent: episode.originalContent,
|
||||||
contentEmbedding: episode.contentEmbedding,
|
contentEmbedding: episode.contentEmbedding,
|
||||||
type: episode.type,
|
type: episode.type,
|
||||||
source: episode.source,
|
source: episode.source,
|
||||||
@ -112,6 +116,7 @@ export async function getRecentEpisodes(params: {
|
|||||||
return {
|
return {
|
||||||
uuid: episode.uuid,
|
uuid: episode.uuid,
|
||||||
content: episode.content,
|
content: episode.content,
|
||||||
|
originalContent: episode.originalContent,
|
||||||
contentEmbedding: episode.contentEmbedding,
|
contentEmbedding: episode.contentEmbedding,
|
||||||
type: episode.type,
|
type: episode.type,
|
||||||
source: episode.source,
|
source: episode.source,
|
||||||
|
|||||||
@ -64,10 +64,14 @@ export async function saveTriple(triple: Triple): Promise<string> {
|
|||||||
MATCH (object:Entity {uuid: $objectUuid, userId: $userId})
|
MATCH (object:Entity {uuid: $objectUuid, userId: $userId})
|
||||||
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})
|
MATCH (episode:Episode {uuid: $episodeUuid, userId: $userId})
|
||||||
|
|
||||||
CREATE (episode)-[:HAS_PROVENANCE {uuid: $provenanceEdgeUuid, createdAt: $createdAt}]->(statement)
|
MERGE (episode)-[prov:HAS_PROVENANCE]->(statement)
|
||||||
CREATE (statement)-[:HAS_SUBJECT {uuid: $subjectEdgeUuid, createdAt: $createdAt}]->(subject)
|
ON CREATE SET prov.uuid = $provenanceEdgeUuid, prov.createdAt = $createdAt
|
||||||
CREATE (statement)-[:HAS_PREDICATE {uuid: $predicateEdgeUuid, createdAt: $createdAt}]->(predicate)
|
MERGE (statement)-[subj:HAS_SUBJECT]->(subject)
|
||||||
CREATE (statement)-[:HAS_OBJECT {uuid: $objectEdgeUuid, createdAt: $createdAt}]->(object)
|
ON CREATE SET subj.uuid = $subjectEdgeUuid, subj.createdAt = $createdAt
|
||||||
|
MERGE (statement)-[pred:HAS_PREDICATE]->(predicate)
|
||||||
|
ON CREATE SET pred.uuid = $predicateEdgeUuid, pred.createdAt = $createdAt
|
||||||
|
MERGE (statement)-[obj:HAS_OBJECT]->(object)
|
||||||
|
ON CREATE SET obj.uuid = $objectEdgeUuid, obj.createdAt = $createdAt
|
||||||
|
|
||||||
RETURN statement.uuid as uuid
|
RETURN statement.uuid as uuid
|
||||||
`;
|
`;
|
||||||
@ -267,6 +271,7 @@ export async function getTripleForStatement({
|
|||||||
const provenance: EpisodicNode = {
|
const provenance: EpisodicNode = {
|
||||||
uuid: episodeProps.uuid,
|
uuid: episodeProps.uuid,
|
||||||
content: episodeProps.content,
|
content: episodeProps.content,
|
||||||
|
originalContent: episodeProps.originalContent,
|
||||||
source: episodeProps.source,
|
source: episodeProps.source,
|
||||||
type: episodeProps.type,
|
type: episodeProps.type,
|
||||||
createdAt: new Date(episodeProps.createdAt),
|
createdAt: new Date(episodeProps.createdAt),
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
import { openai } from "@ai-sdk/openai";
|
import { openai } from "@ai-sdk/openai";
|
||||||
import { type CoreMessage, embed } from "ai";
|
import { type CoreMessage, embed } from "ai";
|
||||||
import {
|
import {
|
||||||
entityTypes,
|
|
||||||
EpisodeType,
|
EpisodeType,
|
||||||
LLMModelEnum,
|
LLMModelEnum,
|
||||||
type AddEpisodeParams,
|
type AddEpisodeParams,
|
||||||
@ -27,6 +26,8 @@ import {
|
|||||||
saveTriple,
|
saveTriple,
|
||||||
} from "./graphModels/statement";
|
} from "./graphModels/statement";
|
||||||
import { makeModelCall } from "~/lib/model.server";
|
import { makeModelCall } from "~/lib/model.server";
|
||||||
|
import { Apps, getNodeTypes, getNodeTypesString } from "~/utils/presets/nodes";
|
||||||
|
import { normalizePrompt } from "./prompts";
|
||||||
|
|
||||||
// Default number of previous episodes to retrieve for context
|
// Default number of previous episodes to retrieve for context
|
||||||
const DEFAULT_EPISODE_WINDOW = 5;
|
const DEFAULT_EPISODE_WINDOW = 5;
|
||||||
@ -61,10 +62,16 @@ export class KnowledgeGraphService {
|
|||||||
source: params.source,
|
source: params.source,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const normalizedEpisodeBody = await this.normalizeEpisodeBody(
|
||||||
|
params.episodeBody,
|
||||||
|
params.source,
|
||||||
|
);
|
||||||
|
|
||||||
// Step 2: Episode Creation - Create or retrieve the episode
|
// Step 2: Episode Creation - Create or retrieve the episode
|
||||||
const episode: EpisodicNode = {
|
const episode: EpisodicNode = {
|
||||||
uuid: crypto.randomUUID(),
|
uuid: crypto.randomUUID(),
|
||||||
content: params.episodeBody,
|
content: normalizedEpisodeBody,
|
||||||
|
originalContent: params.episodeBody,
|
||||||
source: params.source,
|
source: params.source,
|
||||||
type: params.type || EpisodeType.Text,
|
type: params.type || EpisodeType.Text,
|
||||||
createdAt: now,
|
createdAt: now,
|
||||||
@ -130,6 +137,12 @@ export class KnowledgeGraphService {
|
|||||||
episode: EpisodicNode,
|
episode: EpisodicNode,
|
||||||
previousEpisodes: EpisodicNode[],
|
previousEpisodes: EpisodicNode[],
|
||||||
): Promise<EntityNode[]> {
|
): Promise<EntityNode[]> {
|
||||||
|
// Get all app keys
|
||||||
|
const allAppEnumValues = Object.values(Apps);
|
||||||
|
|
||||||
|
// Get all node types
|
||||||
|
const entityTypes = getNodeTypes(allAppEnumValues);
|
||||||
|
|
||||||
// Use the prompt library to get the appropriate prompts
|
// Use the prompt library to get the appropriate prompts
|
||||||
const context = {
|
const context = {
|
||||||
episodeContent: episode.content,
|
episodeContent: episode.content,
|
||||||
@ -172,7 +185,9 @@ export class KnowledgeGraphService {
|
|||||||
name: entity.name,
|
name: entity.name,
|
||||||
type: entity.type,
|
type: entity.type,
|
||||||
attributes: entity.attributes || {},
|
attributes: entity.attributes || {},
|
||||||
nameEmbedding: await this.getEmbedding(entity.name),
|
nameEmbedding: await this.getEmbedding(
|
||||||
|
`${entity.type}: ${entity.name}`,
|
||||||
|
),
|
||||||
createdAt: new Date(),
|
createdAt: new Date(),
|
||||||
userId: episode.userId,
|
userId: episode.userId,
|
||||||
})),
|
})),
|
||||||
@ -652,4 +667,35 @@ export class KnowledgeGraphService {
|
|||||||
|
|
||||||
return { resolvedStatements, invalidatedStatements };
|
return { resolvedStatements, invalidatedStatements };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize an episode by extracting entities and creating nodes and statements
|
||||||
|
*/
|
||||||
|
private async normalizeEpisodeBody(episodeBody: string, source: string) {
|
||||||
|
let appEnumValues: Apps[] = [];
|
||||||
|
if (Apps[source.toUpperCase() as keyof typeof Apps]) {
|
||||||
|
appEnumValues = [Apps[source.toUpperCase() as keyof typeof Apps]];
|
||||||
|
}
|
||||||
|
const entityTypes = getNodeTypesString(appEnumValues);
|
||||||
|
|
||||||
|
const context = {
|
||||||
|
episodeContent: episodeBody,
|
||||||
|
entityTypes: entityTypes,
|
||||||
|
source,
|
||||||
|
};
|
||||||
|
const messages = normalizePrompt(context);
|
||||||
|
let responseText = "";
|
||||||
|
await makeModelCall(false, LLMModelEnum.GPT41, messages, (text) => {
|
||||||
|
responseText = text;
|
||||||
|
});
|
||||||
|
let normalizedEpisodeBody = "";
|
||||||
|
const outputMatch = responseText.match(/<output>([\s\S]*?)<\/output>/);
|
||||||
|
if (outputMatch && outputMatch[1]) {
|
||||||
|
normalizedEpisodeBody = outputMatch[1].trim();
|
||||||
|
} else {
|
||||||
|
normalizedEpisodeBody = episodeBody;
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalizedEpisodeBody;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,2 +1,3 @@
|
|||||||
export * from "./nodes";
|
export * from "./nodes";
|
||||||
export * from "./statements";
|
export * from "./statements";
|
||||||
|
export * from "./normalize";
|
||||||
|
|||||||
@ -4,30 +4,6 @@
|
|||||||
|
|
||||||
import { type CoreMessage } from "ai";
|
import { type CoreMessage } from "ai";
|
||||||
|
|
||||||
export interface ExtractedEntity {
|
|
||||||
name: string;
|
|
||||||
type: string;
|
|
||||||
attributes?: Record<string, any>;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface ExtractedEntities {
|
|
||||||
entities: ExtractedEntity[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface MissedEntities {
|
|
||||||
missedEntities: string[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface EntityClassificationTriple {
|
|
||||||
uuid: string;
|
|
||||||
name: string;
|
|
||||||
type: string | null;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface EntityClassification {
|
|
||||||
entityClassifications: EntityClassificationTriple[];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract entities from an episode using message-based approach
|
* Extract entities from an episode using message-based approach
|
||||||
*/
|
*/
|
||||||
@ -47,10 +23,14 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
|||||||
1. **Entity Identification**:
|
1. **Entity Identification**:
|
||||||
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE.
|
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT EPISODE.
|
||||||
- **Exclude** entities mentioned only in the PREVIOUS EPISODES (they are for context only).
|
- **Exclude** entities mentioned only in the PREVIOUS EPISODES (they are for context only).
|
||||||
|
- For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X).
|
||||||
|
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||||
|
|
||||||
|
|
||||||
2. **Entity Classification**:
|
2. **Entity Classification**:
|
||||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
||||||
- Assign the appropriate type for each one.
|
- Assign the appropriate type for each one.
|
||||||
|
- Classify pronouns (I, me, you, etc.) as Alias entities.
|
||||||
|
|
||||||
3. **Exclusions**:
|
3. **Exclusions**:
|
||||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||||
@ -58,6 +38,7 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr
|
|||||||
|
|
||||||
4. **Formatting**:
|
4. **Formatting**:
|
||||||
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
||||||
|
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
|
||||||
|
|
||||||
|
|
||||||
Format your response as a JSON object with the following structure:
|
Format your response as a JSON object with the following structure:
|
||||||
@ -113,10 +94,13 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli
|
|||||||
|
|
||||||
1. **Entity Identification**:
|
1. **Entity Identification**:
|
||||||
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
|
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the TEXT.
|
||||||
|
- For identity statements like "I am X" or "I'm X", extract BOTH the pronoun ("I") as a Alias entity AND the named entity (X).
|
||||||
|
- For pronouns that refer to named entities, extract them as separate Alias entities.
|
||||||
|
|
||||||
2. **Entity Classification**:
|
2. **Entity Classification**:
|
||||||
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
||||||
- Assign the appropriate type for each one.
|
- Assign the appropriate type for each one.
|
||||||
|
- Classify pronouns (I, me, you, etc.) as Alias entities.
|
||||||
|
|
||||||
3. **Exclusions**:
|
3. **Exclusions**:
|
||||||
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
- Do NOT extract entities representing relationships or actions (predicates will be handled separately).
|
||||||
@ -124,7 +108,7 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli
|
|||||||
|
|
||||||
4. **Formatting**:
|
4. **Formatting**:
|
||||||
- Be **explicit and unambiguous** when naming entities (e.g., use full names when available).
|
- Be **explicit and unambiguous** when naming entities (e.g., use full names when available).
|
||||||
|
- For pronouns, use the exact form as they appear in the text (e.g., "I", "me", "you").
|
||||||
|
|
||||||
Format your response as a JSON object with the following structure:
|
Format your response as a JSON object with the following structure:
|
||||||
<output>
|
<output>
|
||||||
|
|||||||
47
apps/webapp/app/services/prompts/normalize.ts
Normal file
47
apps/webapp/app/services/prompts/normalize.ts
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import { type CoreMessage } from "ai";
|
||||||
|
|
||||||
|
export const normalizePrompt = (
|
||||||
|
context: Record<string, any>,
|
||||||
|
): CoreMessage[] => {
|
||||||
|
const sysPrompt = `
|
||||||
|
You are a memory extraction system. Your task is to convert input information—such as user input, system events, or assistant actions—into clear, concise, third-person factual statements suitable for storage in a memory graph. These statements should be easily understandable and retrievable by any system or agent.
|
||||||
|
|
||||||
|
## Memory Processing Guidelines
|
||||||
|
- Always output memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected...").
|
||||||
|
- Convert input information into clear, concise memory statements.
|
||||||
|
- Maintain a neutral, factual tone in all memory entries.
|
||||||
|
- Structure memories as factual statements, not questions.
|
||||||
|
- Include relevant context and temporal information when available.
|
||||||
|
|
||||||
|
## Node Entity Types
|
||||||
|
${context.entityTypes}
|
||||||
|
|
||||||
|
## Memory Graph Integration
|
||||||
|
- Each memory will be converted to a node in the memory graph.
|
||||||
|
- Include relevant relationships between memory items when possible.
|
||||||
|
- Specify temporal aspects when memories are time-sensitive.
|
||||||
|
- Format memories to support efficient retrieval by any system or agent.
|
||||||
|
|
||||||
|
When processing new information for memory storage, focus on extracting the core facts, preferences, and events that will be most useful for future reference by any system or agent.
|
||||||
|
|
||||||
|
<output>
|
||||||
|
{{processed_statement}}
|
||||||
|
</output>
|
||||||
|
`;
|
||||||
|
|
||||||
|
const userPrompt = `
|
||||||
|
<CONTENT>
|
||||||
|
${context.episodeContent}
|
||||||
|
</CONTENT>
|
||||||
|
|
||||||
|
<SOURCE>
|
||||||
|
${context.source}
|
||||||
|
</SOURCE>
|
||||||
|
|
||||||
|
`;
|
||||||
|
|
||||||
|
return [
|
||||||
|
{ role: "system", content: sysPrompt },
|
||||||
|
{ role: "user", content: userPrompt },
|
||||||
|
];
|
||||||
|
};
|
||||||
@ -4,6 +4,7 @@ import { embed } from "ai";
|
|||||||
import { logger } from "./logger.service";
|
import { logger } from "./logger.service";
|
||||||
import { applyCrossEncoderReranking, applyWeightedRRF } from "./search/rerank";
|
import { applyCrossEncoderReranking, applyWeightedRRF } from "./search/rerank";
|
||||||
import {
|
import {
|
||||||
|
getEpisodesByStatements,
|
||||||
performBfsSearch,
|
performBfsSearch,
|
||||||
performBM25Search,
|
performBM25Search,
|
||||||
performVectorSearch,
|
performVectorSearch,
|
||||||
@ -34,7 +35,7 @@ export class SearchService {
|
|||||||
query: string,
|
query: string,
|
||||||
userId: string,
|
userId: string,
|
||||||
options: SearchOptions = {},
|
options: SearchOptions = {},
|
||||||
): Promise<string[]> {
|
): Promise<{ episodes: string[]; facts: string[] }> {
|
||||||
// Default options
|
// Default options
|
||||||
const opts: Required<SearchOptions> = {
|
const opts: Required<SearchOptions> = {
|
||||||
limit: options.limit || 10,
|
limit: options.limit || 10,
|
||||||
@ -71,7 +72,11 @@ export class SearchService {
|
|||||||
const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
|
const filteredResults = this.applyAdaptiveFiltering(rankedStatements, opts);
|
||||||
|
|
||||||
// 3. Return top results
|
// 3. Return top results
|
||||||
return filteredResults.map((statement) => statement.fact);
|
const episodes = await getEpisodesByStatements(filteredResults);
|
||||||
|
return {
|
||||||
|
episodes: episodes.map((episode) => episode.content),
|
||||||
|
facts: filteredResults.map((statement) => statement.fact),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -84,12 +89,14 @@ export class SearchService {
|
|||||||
): StatementNode[] {
|
): StatementNode[] {
|
||||||
if (results.length === 0) return [];
|
if (results.length === 0) return [];
|
||||||
|
|
||||||
|
let isRRF = false;
|
||||||
// Extract scores from results
|
// Extract scores from results
|
||||||
const scoredResults = results.map((result) => {
|
const scoredResults = results.map((result) => {
|
||||||
// Find the score based on reranking strategy used
|
// Find the score based on reranking strategy used
|
||||||
let score = 0;
|
let score = 0;
|
||||||
if ((result as any).rrfScore !== undefined) {
|
if ((result as any).rrfScore !== undefined) {
|
||||||
score = (result as any).rrfScore;
|
score = (result as any).rrfScore;
|
||||||
|
isRRF = true;
|
||||||
} else if ((result as any).mmrScore !== undefined) {
|
} else if ((result as any).mmrScore !== undefined) {
|
||||||
score = (result as any).mmrScore;
|
score = (result as any).mmrScore;
|
||||||
} else if ((result as any).crossEncoderScore !== undefined) {
|
} else if ((result as any).crossEncoderScore !== undefined) {
|
||||||
@ -117,16 +124,31 @@ export class SearchService {
|
|||||||
const minScore = Math.min(...scores);
|
const minScore = Math.min(...scores);
|
||||||
const scoreRange = maxScore - minScore;
|
const scoreRange = maxScore - minScore;
|
||||||
|
|
||||||
// Define a minimum quality threshold as a fraction of the best score
|
let threshold = 0;
|
||||||
// This is relative to the query's score distribution rather than an absolute value
|
if (isRRF || scoreRange < 0.01) {
|
||||||
const relativeThreshold = options.scoreThreshold || 0.3; // 30% of the best score by default
|
// For RRF or other compressed score ranges, use a percentile-based approach
|
||||||
const absoluteMinimum = 0.1; // Absolute minimum threshold to prevent keeping very poor matches
|
// Keep top 70% (or whatever is specified in options) of results
|
||||||
|
const keepPercentage = 1 - (options.scoreThreshold || 0.3);
|
||||||
|
const keepCount = Math.max(
|
||||||
|
1,
|
||||||
|
Math.ceil(scoredResults.length * keepPercentage),
|
||||||
|
);
|
||||||
|
|
||||||
// Calculate the actual threshold as a percentage of the distance from min to max score
|
// Set threshold to the score of the last item we want to keep
|
||||||
const threshold = Math.max(
|
threshold =
|
||||||
absoluteMinimum,
|
keepCount < scoredResults.length
|
||||||
minScore + scoreRange * relativeThreshold,
|
? scoredResults[keepCount - 1].score
|
||||||
);
|
: 0;
|
||||||
|
} else {
|
||||||
|
// For normal score distributions, use the relative threshold approach
|
||||||
|
const relativeThreshold = options.scoreThreshold || 0.3;
|
||||||
|
const absoluteMinimum = 0.1;
|
||||||
|
|
||||||
|
threshold = Math.max(
|
||||||
|
absoluteMinimum,
|
||||||
|
minScore + scoreRange * relativeThreshold,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Filter out low-quality results
|
// Filter out low-quality results
|
||||||
const filteredResults = scoredResults
|
const filteredResults = scoredResults
|
||||||
|
|||||||
@ -89,7 +89,7 @@ export async function applyCrossEncoderReranking(
|
|||||||
{
|
{
|
||||||
role: "system",
|
role: "system",
|
||||||
content: `You are an expert tasked with determining whether the statement is relevant to the query
|
content: `You are an expert tasked with determining whether the statement is relevant to the query
|
||||||
Respond with "True" if PASSAGE is relevant to QUERY and "False" otherwise.`,
|
Respond with "True" if STATEMENT is relevant to QUERY and "False" otherwise.`,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
role: "user",
|
role: "user",
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
import type { EntityNode, StatementNode } from "@core/types";
|
import type { EntityNode, StatementNode, EpisodicNode } from "@core/types";
|
||||||
import type { SearchOptions } from "../search.server";
|
import type { SearchOptions } from "../search.server";
|
||||||
import type { Embedding } from "ai";
|
import type { Embedding } from "ai";
|
||||||
import { logger } from "../logger.service";
|
import { logger } from "../logger.service";
|
||||||
@ -219,3 +219,20 @@ export function combineAndDeduplicateStatements(
|
|||||||
).values(),
|
).values(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function getEpisodesByStatements(
|
||||||
|
statements: StatementNode[],
|
||||||
|
): Promise<EpisodicNode[]> {
|
||||||
|
const cypher = `
|
||||||
|
MATCH (s:Statement)<-[:HAS_PROVENANCE]-(e:Episode)
|
||||||
|
WHERE s.uuid IN $statementUuids
|
||||||
|
RETURN distinct e
|
||||||
|
`;
|
||||||
|
|
||||||
|
const params = {
|
||||||
|
statementUuids: statements.map((s) => s.uuid),
|
||||||
|
};
|
||||||
|
|
||||||
|
const records = await runQuery(cypher, params);
|
||||||
|
return records.map((record) => record.get("e").properties as EpisodicNode);
|
||||||
|
}
|
||||||
|
|||||||
@ -1,11 +1,13 @@
|
|||||||
export enum Apps {
|
export enum Apps {
|
||||||
LINEAR = "LINEAR",
|
LINEAR = "LINEAR",
|
||||||
SLACK = "SLACK",
|
SLACK = "SLACK",
|
||||||
|
SOL = "SOL",
|
||||||
}
|
}
|
||||||
|
|
||||||
export const AppNames = {
|
export const AppNames = {
|
||||||
[Apps.LINEAR]: "Linear",
|
[Apps.LINEAR]: "Linear",
|
||||||
[Apps.SLACK]: "Slack",
|
[Apps.SLACK]: "Slack",
|
||||||
|
[Apps.SOL]: "Sol",
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
// General node types that are common across all apps
|
// General node types that are common across all apps
|
||||||
@ -38,6 +40,33 @@ export const GENERAL_NODE_TYPES = {
|
|||||||
|
|
||||||
// App-specific node types
|
// App-specific node types
|
||||||
export const APP_NODE_TYPES = {
|
export const APP_NODE_TYPES = {
|
||||||
|
[Apps.SOL]: {
|
||||||
|
TASK: {
|
||||||
|
name: "Sol Task",
|
||||||
|
description:
|
||||||
|
"An independent unit of work in Sol, such as a task, bug report, or feature request. Tasks can be associated with lists or linked as subtasks to other tasks.",
|
||||||
|
},
|
||||||
|
LIST: {
|
||||||
|
name: "Sol List",
|
||||||
|
description:
|
||||||
|
"A flexible container in Sol for organizing content such as tasks, text, or references. Lists are used for task tracking, information collections, or reference materials.",
|
||||||
|
},
|
||||||
|
PREFERENCE: {
|
||||||
|
name: "Sol Preference",
|
||||||
|
description:
|
||||||
|
"A user-stated intent, setting, or configuration in Sol, such as preferred formats, notification settings, timezones, or other customizations. Preferences reflect how a user wants the system to behave.",
|
||||||
|
},
|
||||||
|
COMMAND: {
|
||||||
|
name: "Sol Command",
|
||||||
|
description:
|
||||||
|
"A user-issued command or trigger phrase, often starting with '/' or '@', that directs the system or an app to perform a specific action. Commands should always be extracted as distinct, important user actions.",
|
||||||
|
},
|
||||||
|
AUTOMATION: {
|
||||||
|
name: "Sol Automation",
|
||||||
|
description:
|
||||||
|
"A workflow or rule in Sol that automatically performs actions based on specific conditions or triggers, such as recurring tasks, reminders, or integrations with other systems.",
|
||||||
|
},
|
||||||
|
},
|
||||||
[Apps.LINEAR]: {
|
[Apps.LINEAR]: {
|
||||||
ISSUE: {
|
ISSUE: {
|
||||||
name: "Linear Issue",
|
name: "Linear Issue",
|
||||||
@ -102,3 +131,33 @@ export function getNodeTypes(apps: Array<keyof typeof APP_NODE_TYPES>) {
|
|||||||
appSpecific: appSpecificTypes,
|
appSpecific: appSpecificTypes,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function getNodeTypesString(apps: Array<keyof typeof APP_NODE_TYPES>) {
|
||||||
|
let nodeTypesString = "";
|
||||||
|
const generalTypes = Object.entries(GENERAL_NODE_TYPES)
|
||||||
|
.map(([key, value]) => {
|
||||||
|
return `- ${key}: "${value.description}"`;
|
||||||
|
})
|
||||||
|
.join("\n");
|
||||||
|
nodeTypesString += `General Node Types:\n${generalTypes}\n\n`;
|
||||||
|
|
||||||
|
const appSpecificTypes = apps.reduce((acc, appName) => {
|
||||||
|
return {
|
||||||
|
...acc,
|
||||||
|
[appName]: APP_NODE_TYPES[appName],
|
||||||
|
};
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const appSpecificTypesString = Object.entries(appSpecificTypes)
|
||||||
|
.map(([appName, types]) => {
|
||||||
|
return `For ${appName}:\n${Object.entries(types as any)
|
||||||
|
.map(([key, value]: any) => {
|
||||||
|
return `- ${key}: "${value.description}"`;
|
||||||
|
})
|
||||||
|
.join("\n")}\n\n`;
|
||||||
|
})
|
||||||
|
.join("\n\n");
|
||||||
|
|
||||||
|
nodeTypesString += `App-specific Node Types:\n${appSpecificTypesString}`;
|
||||||
|
return nodeTypesString;
|
||||||
|
}
|
||||||
|
|||||||
@ -10,6 +10,7 @@ export enum EpisodeType {
|
|||||||
export interface EpisodicNode {
|
export interface EpisodicNode {
|
||||||
uuid: string;
|
uuid: string;
|
||||||
content: string;
|
content: string;
|
||||||
|
originalContent: string;
|
||||||
contentEmbedding?: number[];
|
contentEmbedding?: number[];
|
||||||
type: string;
|
type: string;
|
||||||
source: string;
|
source: string;
|
||||||
@ -82,70 +83,3 @@ export type AddEpisodeResult = {
|
|||||||
statementsCreated: number;
|
statementsCreated: number;
|
||||||
processingTimeMs: number;
|
processingTimeMs: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const entityTypes = {
|
|
||||||
general: {
|
|
||||||
PERSON: {
|
|
||||||
type: "PERSON",
|
|
||||||
description: "Any named individual mentioned or referenced.",
|
|
||||||
},
|
|
||||||
APP: {
|
|
||||||
type: "APP",
|
|
||||||
description: "Any third-party service or platform used by the user.",
|
|
||||||
},
|
|
||||||
SOL_AUTOMATION: {
|
|
||||||
type: "SOL_AUTOMATION",
|
|
||||||
description: "User workflows or flows combining triggers and actions.",
|
|
||||||
},
|
|
||||||
SOL_PREFERENCE: {
|
|
||||||
type: "SOL_PREFERENCE",
|
|
||||||
description: "User-stated intent, setting, or configuration like format, timezone, etc.",
|
|
||||||
},
|
|
||||||
COMMAND: {
|
|
||||||
type: "COMMAND",
|
|
||||||
description: "Trigger phrase mapped to an internal action, often starts with / or !",
|
|
||||||
},
|
|
||||||
TASK: {
|
|
||||||
type: "TASK",
|
|
||||||
description: "User-stated or inferred goal; may link to Person or App.",
|
|
||||||
},
|
|
||||||
EVENT: {
|
|
||||||
type: "EVENT",
|
|
||||||
description: "Time-based mention; supports parsing of phrases like 'next week', 'tomorrow'.",
|
|
||||||
},
|
|
||||||
LABEL: {
|
|
||||||
type: "LABEL",
|
|
||||||
description: "Optional categorization tag for organization or filtering.",
|
|
||||||
},
|
|
||||||
OBJECT: {
|
|
||||||
type: "OBJECT",
|
|
||||||
description: "Named non-person objects in the user's world (e.g., Projector, Car).",
|
|
||||||
},
|
|
||||||
TEAM: {
|
|
||||||
type: "TEAM",
|
|
||||||
description: "User-defined group of people, useful for permissions or targeting.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
app_specific: {
|
|
||||||
SLACK_CHANNEL: {
|
|
||||||
type: "SLACK_CHANNEL",
|
|
||||||
description: "Slack channel where automations or communications happen.",
|
|
||||||
},
|
|
||||||
SLACK_USER: {
|
|
||||||
type: "SLACK_USER",
|
|
||||||
description: "A user in Slack, can be tagged or messaged.",
|
|
||||||
},
|
|
||||||
GMAIL_THREAD: {
|
|
||||||
type: "GMAIL_THREAD",
|
|
||||||
description: "An email conversation thread in Gmail.",
|
|
||||||
},
|
|
||||||
NOTION_PAGE: {
|
|
||||||
type: "NOTION_PAGE",
|
|
||||||
description: "A page in Notion workspace.",
|
|
||||||
},
|
|
||||||
CALENDAR_EVENT: {
|
|
||||||
type: "CALENDAR_EVENT",
|
|
||||||
description: "Event from user's calendar (Google, Outlook, etc.).",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user