mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 09:08:28 +00:00
fix: cyphers to use vector indexes
This commit is contained in:
parent
7d00e62ba8
commit
7205b909a4
@ -312,6 +312,18 @@ const initializeSchema = async () => {
|
||||
await runQuery(
|
||||
"CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
|
||||
);
|
||||
await runQuery(
|
||||
"CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)",
|
||||
);
|
||||
await runQuery(
|
||||
"CREATE INDEX entity_type IF NOT EXISTS FOR (n:Entity) ON (n.type)",
|
||||
);
|
||||
await runQuery(
|
||||
"CREATE INDEX entity_user_id IF NOT EXISTS FOR (n:Entity) ON (n.userId)",
|
||||
);
|
||||
await runQuery(
|
||||
"CREATE INDEX statement_user_id IF NOT EXISTS FOR (n:Statement) ON (n.userId)",
|
||||
);
|
||||
await runQuery(
|
||||
"CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)",
|
||||
);
|
||||
@ -322,17 +334,17 @@ const initializeSchema = async () => {
|
||||
// Create vector indexes for semantic search (if using Neo4j 5.0+)
|
||||
await runQuery(`
|
||||
CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
|
||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
|
||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
|
||||
`);
|
||||
|
||||
await runQuery(`
|
||||
CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
|
||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
|
||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
|
||||
`);
|
||||
|
||||
await runQuery(`
|
||||
CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
|
||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
|
||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
|
||||
`);
|
||||
|
||||
// Create fulltext indexes for BM25 search
|
||||
@ -348,7 +360,7 @@ const initializeSchema = async () => {
|
||||
|
||||
await runQuery(`
|
||||
CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS
|
||||
FOR (n:Entity) ON EACH [n.name, n.description]
|
||||
FOR (n:Entity) ON EACH [n.name]
|
||||
OPTIONS {
|
||||
indexConfig: {
|
||||
\`fulltext.analyzer\`: 'english'
|
||||
|
||||
@ -83,16 +83,15 @@ export async function findSimilarEntities(params: {
|
||||
userId: string;
|
||||
}): Promise<EntityNode[]> {
|
||||
const query = `
|
||||
MATCH (entity:Entity)
|
||||
WHERE entity.nameEmbedding IS NOT NULL
|
||||
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
|
||||
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
|
||||
YIELD node AS entity, score
|
||||
WHERE score >= $threshold
|
||||
AND entity.userId = $userId
|
||||
RETURN entity, score
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
|
||||
const result = await runQuery(query, params);
|
||||
const result = await runQuery(query, { ...params, topK: params.limit });
|
||||
return result.map((record) => {
|
||||
const entity = record.get("entity").properties;
|
||||
|
||||
@ -118,9 +117,8 @@ export async function findSimilarEntitiesWithSameType(params: {
|
||||
userId: string;
|
||||
}): Promise<EntityNode[]> {
|
||||
const query = `
|
||||
MATCH (entity:Entity)
|
||||
WHERE entity.nameEmbedding IS NOT NULL
|
||||
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
|
||||
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
|
||||
YIELD node AS entity, score
|
||||
WHERE score >= $threshold
|
||||
AND entity.userId = $userId
|
||||
AND entity.type = $entityType
|
||||
@ -128,7 +126,7 @@ export async function findSimilarEntitiesWithSameType(params: {
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
|
||||
const result = await runQuery(query, params);
|
||||
const result = await runQuery(query, { ...params, topK: params.limit });
|
||||
return result.map((record) => {
|
||||
const entity = record.get("entity").properties;
|
||||
|
||||
|
||||
@ -137,16 +137,10 @@ export async function searchEpisodesByEmbedding(params: {
|
||||
minSimilarity?: number;
|
||||
}) {
|
||||
const query = `
|
||||
MATCH (episode:Episode)
|
||||
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
|
||||
YIELD node AS episode, score
|
||||
WHERE episode.userId = $userId
|
||||
AND episode.contentEmbedding IS NOT NULL
|
||||
WITH episode,
|
||||
CASE
|
||||
WHEN size(episode.contentEmbedding) = size($embedding)
|
||||
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
|
||||
ELSE 0
|
||||
END AS score
|
||||
WHERE score >= $minSimilarity
|
||||
AND score >= $minSimilarity
|
||||
RETURN episode, score
|
||||
ORDER BY score DESC`;
|
||||
|
||||
@ -154,6 +148,7 @@ export async function searchEpisodesByEmbedding(params: {
|
||||
embedding: params.embedding,
|
||||
minSimilarity: params.minSimilarity,
|
||||
userId: params.userId,
|
||||
topK: 100,
|
||||
});
|
||||
|
||||
if (!result || result.length === 0) {
|
||||
@ -283,15 +278,10 @@ export async function getRelatedEpisodesEntities(params: {
|
||||
minSimilarity?: number;
|
||||
}) {
|
||||
const query = `
|
||||
MATCH (episode:Episode {userId: $userId})
|
||||
WHERE episode.contentEmbedding IS NOT NULL
|
||||
WITH episode,
|
||||
CASE
|
||||
WHEN size(episode.contentEmbedding) = size($embedding)
|
||||
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
|
||||
ELSE 0
|
||||
END AS score
|
||||
WHERE score >= $minSimilarity
|
||||
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
|
||||
YIELD node AS episode, score
|
||||
WHERE episode.userId = $userId
|
||||
AND score >= $minSimilarity
|
||||
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
|
||||
WHERE entity IS NOT NULL
|
||||
RETURN DISTINCT entity`;
|
||||
@ -300,6 +290,7 @@ export async function getRelatedEpisodesEntities(params: {
|
||||
embedding: params.embedding,
|
||||
minSimilarity: params.minSimilarity,
|
||||
userId: params.userId,
|
||||
topK: params.limit || 100,
|
||||
});
|
||||
|
||||
return result
|
||||
|
||||
@ -110,11 +110,10 @@ export async function findContradictoryStatements({
|
||||
userId: string;
|
||||
}): Promise<StatementNode[]> {
|
||||
const query = `
|
||||
MATCH (statement:Statement)
|
||||
MATCH (subject:Entity {uuid: $subjectId}), (predicate:Entity {uuid: $predicateId})
|
||||
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_PREDICATE]->(predicate)
|
||||
WHERE statement.userId = $userId
|
||||
AND statement.invalidAt IS NULL
|
||||
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
||||
WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
|
||||
RETURN statement
|
||||
`;
|
||||
|
||||
@ -157,18 +156,21 @@ export async function findStatementsWithSameSubjectObject({
|
||||
userId: string;
|
||||
}): Promise<StatementNode[]> {
|
||||
const query = `
|
||||
MATCH (statement:Statement)
|
||||
MATCH (subject:Entity {uuid: $subjectId}), (object:Entity {uuid: $objectId})
|
||||
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_OBJECT]->(object)
|
||||
MATCH (statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
||||
WHERE statement.userId = $userId
|
||||
AND statement.invalidAt IS NULL
|
||||
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
||||
MATCH (statement)-[:HAS_OBJECT]->(object:Entity)
|
||||
WHERE subject.uuid = $subjectId
|
||||
AND object.uuid = $objectId
|
||||
${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""}
|
||||
RETURN statement
|
||||
`;
|
||||
|
||||
const params = { subjectId, objectId, userId, ...(excludePredicateId && { excludePredicateId }) };
|
||||
const params = {
|
||||
subjectId,
|
||||
objectId,
|
||||
userId,
|
||||
...(excludePredicateId && { excludePredicateId }),
|
||||
};
|
||||
const result = await runQuery(query, params);
|
||||
|
||||
if (!result || result.length === 0) {
|
||||
@ -207,13 +209,12 @@ export async function findSimilarStatements({
|
||||
userId: string;
|
||||
}): Promise<StatementNode[]> {
|
||||
const query = `
|
||||
MATCH (statement:Statement)
|
||||
CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
|
||||
YIELD node AS statement, score
|
||||
WHERE statement.userId = $userId
|
||||
AND statement.invalidAt IS NULL
|
||||
AND statement.factEmbedding IS NOT NULL
|
||||
AND statement.invalidAt IS NULL
|
||||
AND score >= $threshold
|
||||
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
|
||||
WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
|
||||
WHERE score >= $threshold
|
||||
RETURN statement, score
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
@ -223,6 +224,7 @@ export async function findSimilarStatements({
|
||||
threshold,
|
||||
excludeIds,
|
||||
userId,
|
||||
topK: 100,
|
||||
});
|
||||
|
||||
if (!result || result.length === 0) {
|
||||
@ -396,17 +398,11 @@ export async function searchStatementsByEmbedding(params: {
|
||||
minSimilarity?: number;
|
||||
}) {
|
||||
const query = `
|
||||
MATCH (statement:Statement)
|
||||
CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
|
||||
YIELD node AS statement, score
|
||||
WHERE statement.userId = $userId
|
||||
AND statement.invalidAt IS NULL
|
||||
AND statement.factEmbedding IS NOT NULL
|
||||
WITH statement,
|
||||
CASE
|
||||
WHEN size(statement.factEmbedding) = size($embedding)
|
||||
THEN vector.similarity.cosine($embedding, statement.factEmbedding)
|
||||
ELSE 0
|
||||
END AS score
|
||||
WHERE score >= $minSimilarity
|
||||
AND statement.invalidAt IS NULL
|
||||
AND score >= $minSimilarity
|
||||
RETURN statement, score
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
@ -416,6 +412,7 @@ export async function searchStatementsByEmbedding(params: {
|
||||
minSimilarity: params.minSimilarity,
|
||||
limit: params.limit,
|
||||
userId: params.userId,
|
||||
topK: params.limit || 100,
|
||||
});
|
||||
|
||||
if (!result || result.length === 0) {
|
||||
|
||||
@ -131,23 +131,23 @@ export async function performVectorSearch(
|
||||
|
||||
// 1. Search for similar statements using Neo4j vector search with provenance count
|
||||
const cypher = `
|
||||
MATCH (s:Statement)
|
||||
WHERE
|
||||
(s.userId = $userId)
|
||||
${timeframeCondition}
|
||||
${spaceCondition}
|
||||
WITH s, vector.similarity.cosine(s.factEmbedding, $embedding) AS score
|
||||
WHERE score > 0.7
|
||||
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
|
||||
WITH s, score, count(episode) as provenanceCount
|
||||
RETURN s, score, provenanceCount
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
|
||||
YIELD node AS s, score
|
||||
WHERE s.userId = $userId
|
||||
AND score >= 0.7
|
||||
${timeframeCondition.replace("AND", "AND").replace("WHERE", "AND")}
|
||||
${spaceCondition}
|
||||
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
|
||||
WITH s, score, count(episode) as provenanceCount
|
||||
RETURN s, score, provenanceCount
|
||||
ORDER BY score DESC
|
||||
`;
|
||||
|
||||
const params = {
|
||||
embedding: query,
|
||||
userId,
|
||||
validAt: options.endTime.toISOString(),
|
||||
topk: options.limit || 100,
|
||||
...(options.startTime && { startTime: options.startTime.toISOString() }),
|
||||
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
|
||||
};
|
||||
@ -281,15 +281,13 @@ export async function extractEntitiesFromQuery(
|
||||
try {
|
||||
// Use vector similarity to find relevant entities
|
||||
const cypher = `
|
||||
// Match entities using vector similarity on name embeddings
|
||||
MATCH (e:Entity)
|
||||
WHERE e.nameEmbedding IS NOT NULL
|
||||
AND e.userId = $userId
|
||||
WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score
|
||||
WHERE score > 0.7
|
||||
// Match entities using vector index on name embeddings
|
||||
CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
|
||||
YIELD node AS e, score
|
||||
WHERE e.userId = $userId
|
||||
AND score > 0.7
|
||||
RETURN e
|
||||
ORDER BY score DESC
|
||||
LIMIT 3
|
||||
`;
|
||||
|
||||
const params = {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user