fix: cyphers to use vector indexes

This commit is contained in:
Manoj K 2025-08-26 14:58:48 +05:30 committed by Harshith Mullapudi
parent 7d00e62ba8
commit 7205b909a4
5 changed files with 69 additions and 73 deletions

View File

@ -312,6 +312,18 @@ const initializeSchema = async () => {
await runQuery(
"CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
);
await runQuery(
"CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)",
);
await runQuery(
"CREATE INDEX entity_type IF NOT EXISTS FOR (n:Entity) ON (n.type)",
);
await runQuery(
"CREATE INDEX entity_user_id IF NOT EXISTS FOR (n:Entity) ON (n.userId)",
);
await runQuery(
"CREATE INDEX statement_user_id IF NOT EXISTS FOR (n:Statement) ON (n.userId)",
);
await runQuery(
"CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)",
);
@ -322,17 +334,17 @@ const initializeSchema = async () => {
// Create vector indexes for semantic search (if using Neo4j 5.0+)
await runQuery(`
CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
`);
await runQuery(`
CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
`);
await runQuery(`
CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
`);
// Create fulltext indexes for BM25 search
@ -348,7 +360,7 @@ const initializeSchema = async () => {
await runQuery(`
CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS
FOR (n:Entity) ON EACH [n.name, n.description]
FOR (n:Entity) ON EACH [n.name]
OPTIONS {
indexConfig: {
\`fulltext.analyzer\`: 'english'

View File

@ -83,16 +83,15 @@ export async function findSimilarEntities(params: {
userId: string;
}): Promise<EntityNode[]> {
const query = `
MATCH (entity:Entity)
WHERE entity.nameEmbedding IS NOT NULL
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
YIELD node AS entity, score
WHERE score >= $threshold
AND entity.userId = $userId
RETURN entity, score
ORDER BY score DESC
`;
const result = await runQuery(query, params);
const result = await runQuery(query, { ...params, topK: params.limit });
return result.map((record) => {
const entity = record.get("entity").properties;
@ -118,9 +117,8 @@ export async function findSimilarEntitiesWithSameType(params: {
userId: string;
}): Promise<EntityNode[]> {
const query = `
MATCH (entity:Entity)
WHERE entity.nameEmbedding IS NOT NULL
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
YIELD node AS entity, score
WHERE score >= $threshold
AND entity.userId = $userId
AND entity.type = $entityType
@ -128,7 +126,7 @@ export async function findSimilarEntitiesWithSameType(params: {
ORDER BY score DESC
`;
const result = await runQuery(query, params);
const result = await runQuery(query, { ...params, topK: params.limit });
return result.map((record) => {
const entity = record.get("entity").properties;

View File

@ -137,16 +137,10 @@ export async function searchEpisodesByEmbedding(params: {
minSimilarity?: number;
}) {
const query = `
MATCH (episode:Episode)
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
YIELD node AS episode, score
WHERE episode.userId = $userId
AND episode.contentEmbedding IS NOT NULL
WITH episode,
CASE
WHEN size(episode.contentEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
AND score >= $minSimilarity
RETURN episode, score
ORDER BY score DESC`;
@ -154,6 +148,7 @@ export async function searchEpisodesByEmbedding(params: {
embedding: params.embedding,
minSimilarity: params.minSimilarity,
userId: params.userId,
topK: 100,
});
if (!result || result.length === 0) {
@ -283,15 +278,10 @@ export async function getRelatedEpisodesEntities(params: {
minSimilarity?: number;
}) {
const query = `
MATCH (episode:Episode {userId: $userId})
WHERE episode.contentEmbedding IS NOT NULL
WITH episode,
CASE
WHEN size(episode.contentEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
YIELD node AS episode, score
WHERE episode.userId = $userId
AND score >= $minSimilarity
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
WHERE entity IS NOT NULL
RETURN DISTINCT entity`;
@ -300,6 +290,7 @@ export async function getRelatedEpisodesEntities(params: {
embedding: params.embedding,
minSimilarity: params.minSimilarity,
userId: params.userId,
topK: params.limit || 100,
});
return result

View File

@ -110,11 +110,10 @@ export async function findContradictoryStatements({
userId: string;
}): Promise<StatementNode[]> {
const query = `
MATCH (statement:Statement)
MATCH (subject:Entity {uuid: $subjectId}), (predicate:Entity {uuid: $predicateId})
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_PREDICATE]->(predicate)
WHERE statement.userId = $userId
AND statement.invalidAt IS NULL
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
RETURN statement
`;
@ -157,18 +156,21 @@ export async function findStatementsWithSameSubjectObject({
userId: string;
}): Promise<StatementNode[]> {
const query = `
MATCH (statement:Statement)
MATCH (subject:Entity {uuid: $subjectId}), (object:Entity {uuid: $objectId})
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_OBJECT]->(object)
MATCH (statement)-[:HAS_PREDICATE]->(predicate:Entity)
WHERE statement.userId = $userId
AND statement.invalidAt IS NULL
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
MATCH (statement)-[:HAS_OBJECT]->(object:Entity)
WHERE subject.uuid = $subjectId
AND object.uuid = $objectId
${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""}
RETURN statement
`;
const params = { subjectId, objectId, userId, ...(excludePredicateId && { excludePredicateId }) };
const params = {
subjectId,
objectId,
userId,
...(excludePredicateId && { excludePredicateId }),
};
const result = await runQuery(query, params);
if (!result || result.length === 0) {
@ -207,13 +209,12 @@ export async function findSimilarStatements({
userId: string;
}): Promise<StatementNode[]> {
const query = `
MATCH (statement:Statement)
CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
YIELD node AS statement, score
WHERE statement.userId = $userId
AND statement.invalidAt IS NULL
AND statement.factEmbedding IS NOT NULL
AND statement.invalidAt IS NULL
AND score >= $threshold
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
WHERE score >= $threshold
RETURN statement, score
ORDER BY score DESC
`;
@ -223,6 +224,7 @@ export async function findSimilarStatements({
threshold,
excludeIds,
userId,
topK: 100,
});
if (!result || result.length === 0) {
@ -396,17 +398,11 @@ export async function searchStatementsByEmbedding(params: {
minSimilarity?: number;
}) {
const query = `
MATCH (statement:Statement)
CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
YIELD node AS statement, score
WHERE statement.userId = $userId
AND statement.invalidAt IS NULL
AND statement.factEmbedding IS NOT NULL
WITH statement,
CASE
WHEN size(statement.factEmbedding) = size($embedding)
THEN vector.similarity.cosine($embedding, statement.factEmbedding)
ELSE 0
END AS score
WHERE score >= $minSimilarity
AND statement.invalidAt IS NULL
AND score >= $minSimilarity
RETURN statement, score
ORDER BY score DESC
`;
@ -416,6 +412,7 @@ export async function searchStatementsByEmbedding(params: {
minSimilarity: params.minSimilarity,
limit: params.limit,
userId: params.userId,
topK: params.limit || 100,
});
if (!result || result.length === 0) {

View File

@ -131,23 +131,23 @@ export async function performVectorSearch(
// 1. Search for similar statements using Neo4j vector search with provenance count
const cypher = `
MATCH (s:Statement)
WHERE
(s.userId = $userId)
${timeframeCondition}
${spaceCondition}
WITH s, vector.similarity.cosine(s.factEmbedding, $embedding) AS score
WHERE score > 0.7
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
WITH s, score, count(episode) as provenanceCount
RETURN s, score, provenanceCount
ORDER BY score DESC
`;
CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
YIELD node AS s, score
WHERE s.userId = $userId
AND score >= 0.7
${timeframeCondition.replace("AND", "AND").replace("WHERE", "AND")}
${spaceCondition}
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
WITH s, score, count(episode) as provenanceCount
RETURN s, score, provenanceCount
ORDER BY score DESC
`;
const params = {
embedding: query,
userId,
validAt: options.endTime.toISOString(),
topk: options.limit || 100,
...(options.startTime && { startTime: options.startTime.toISOString() }),
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
};
@ -281,15 +281,13 @@ export async function extractEntitiesFromQuery(
try {
// Use vector similarity to find relevant entities
const cypher = `
// Match entities using vector similarity on name embeddings
MATCH (e:Entity)
WHERE e.nameEmbedding IS NOT NULL
AND e.userId = $userId
WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score
WHERE score > 0.7
// Match entities using vector index on name embeddings
CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
YIELD node AS e, score
WHERE e.userId = $userId
AND score > 0.7
RETURN e
ORDER BY score DESC
LIMIT 3
`;
const params = {