mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 18:18:27 +00:00
fix: cyphers to use vector indexes
This commit is contained in:
parent
7d00e62ba8
commit
7205b909a4
@ -312,6 +312,18 @@ const initializeSchema = async () => {
|
|||||||
await runQuery(
|
await runQuery(
|
||||||
"CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
|
"CREATE INDEX entity_name IF NOT EXISTS FOR (n:Entity) ON (n.name)",
|
||||||
);
|
);
|
||||||
|
await runQuery(
|
||||||
|
"CREATE INDEX entity_uuid IF NOT EXISTS FOR (n:Entity) ON (n.uuid)",
|
||||||
|
);
|
||||||
|
await runQuery(
|
||||||
|
"CREATE INDEX entity_type IF NOT EXISTS FOR (n:Entity) ON (n.type)",
|
||||||
|
);
|
||||||
|
await runQuery(
|
||||||
|
"CREATE INDEX entity_user_id IF NOT EXISTS FOR (n:Entity) ON (n.userId)",
|
||||||
|
);
|
||||||
|
await runQuery(
|
||||||
|
"CREATE INDEX statement_user_id IF NOT EXISTS FOR (n:Statement) ON (n.userId)",
|
||||||
|
);
|
||||||
await runQuery(
|
await runQuery(
|
||||||
"CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)",
|
"CREATE INDEX cluster_user_id IF NOT EXISTS FOR (n:Cluster) ON (n.userId)",
|
||||||
);
|
);
|
||||||
@ -322,17 +334,17 @@ const initializeSchema = async () => {
|
|||||||
// Create vector indexes for semantic search (if using Neo4j 5.0+)
|
// Create vector indexes for semantic search (if using Neo4j 5.0+)
|
||||||
await runQuery(`
|
await runQuery(`
|
||||||
CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
|
CREATE VECTOR INDEX entity_embedding IF NOT EXISTS FOR (n:Entity) ON n.nameEmbedding
|
||||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
|
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
|
||||||
`);
|
`);
|
||||||
|
|
||||||
await runQuery(`
|
await runQuery(`
|
||||||
CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
|
CREATE VECTOR INDEX statement_embedding IF NOT EXISTS FOR (n:Statement) ON n.factEmbedding
|
||||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
|
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
|
||||||
`);
|
`);
|
||||||
|
|
||||||
await runQuery(`
|
await runQuery(`
|
||||||
CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
|
CREATE VECTOR INDEX episode_embedding IF NOT EXISTS FOR (n:Episode) ON n.contentEmbedding
|
||||||
OPTIONS {indexConfig: {\`vector.dimensions\`: 1536, \`vector.similarity_function\`: 'cosine'}}
|
OPTIONS {indexConfig: {\`vector.dimensions\`: 1024, \`vector.similarity_function\`: 'cosine', \`vector.hnsw.ef_construction\`: 400, \`vector.hnsw.m\`: 32}}
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Create fulltext indexes for BM25 search
|
// Create fulltext indexes for BM25 search
|
||||||
@ -348,7 +360,7 @@ const initializeSchema = async () => {
|
|||||||
|
|
||||||
await runQuery(`
|
await runQuery(`
|
||||||
CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS
|
CREATE FULLTEXT INDEX entity_name_index IF NOT EXISTS
|
||||||
FOR (n:Entity) ON EACH [n.name, n.description]
|
FOR (n:Entity) ON EACH [n.name]
|
||||||
OPTIONS {
|
OPTIONS {
|
||||||
indexConfig: {
|
indexConfig: {
|
||||||
\`fulltext.analyzer\`: 'english'
|
\`fulltext.analyzer\`: 'english'
|
||||||
|
|||||||
@ -83,16 +83,15 @@ export async function findSimilarEntities(params: {
|
|||||||
userId: string;
|
userId: string;
|
||||||
}): Promise<EntityNode[]> {
|
}): Promise<EntityNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (entity:Entity)
|
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
|
||||||
WHERE entity.nameEmbedding IS NOT NULL
|
YIELD node AS entity, score
|
||||||
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
|
|
||||||
WHERE score >= $threshold
|
WHERE score >= $threshold
|
||||||
AND entity.userId = $userId
|
AND entity.userId = $userId
|
||||||
RETURN entity, score
|
RETURN entity, score
|
||||||
ORDER BY score DESC
|
ORDER BY score DESC
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const result = await runQuery(query, params);
|
const result = await runQuery(query, { ...params, topK: params.limit });
|
||||||
return result.map((record) => {
|
return result.map((record) => {
|
||||||
const entity = record.get("entity").properties;
|
const entity = record.get("entity").properties;
|
||||||
|
|
||||||
@ -118,9 +117,8 @@ export async function findSimilarEntitiesWithSameType(params: {
|
|||||||
userId: string;
|
userId: string;
|
||||||
}): Promise<EntityNode[]> {
|
}): Promise<EntityNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (entity:Entity)
|
CALL db.index.vector.queryNodes('entity_embedding', $topK, $queryEmbedding)
|
||||||
WHERE entity.nameEmbedding IS NOT NULL
|
YIELD node AS entity, score
|
||||||
WITH entity, vector.similarity.cosine($queryEmbedding, entity.nameEmbedding) AS score
|
|
||||||
WHERE score >= $threshold
|
WHERE score >= $threshold
|
||||||
AND entity.userId = $userId
|
AND entity.userId = $userId
|
||||||
AND entity.type = $entityType
|
AND entity.type = $entityType
|
||||||
@ -128,7 +126,7 @@ export async function findSimilarEntitiesWithSameType(params: {
|
|||||||
ORDER BY score DESC
|
ORDER BY score DESC
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const result = await runQuery(query, params);
|
const result = await runQuery(query, { ...params, topK: params.limit });
|
||||||
return result.map((record) => {
|
return result.map((record) => {
|
||||||
const entity = record.get("entity").properties;
|
const entity = record.get("entity").properties;
|
||||||
|
|
||||||
|
|||||||
@ -137,16 +137,10 @@ export async function searchEpisodesByEmbedding(params: {
|
|||||||
minSimilarity?: number;
|
minSimilarity?: number;
|
||||||
}) {
|
}) {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (episode:Episode)
|
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
|
||||||
|
YIELD node AS episode, score
|
||||||
WHERE episode.userId = $userId
|
WHERE episode.userId = $userId
|
||||||
AND episode.contentEmbedding IS NOT NULL
|
AND score >= $minSimilarity
|
||||||
WITH episode,
|
|
||||||
CASE
|
|
||||||
WHEN size(episode.contentEmbedding) = size($embedding)
|
|
||||||
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
|
|
||||||
ELSE 0
|
|
||||||
END AS score
|
|
||||||
WHERE score >= $minSimilarity
|
|
||||||
RETURN episode, score
|
RETURN episode, score
|
||||||
ORDER BY score DESC`;
|
ORDER BY score DESC`;
|
||||||
|
|
||||||
@ -154,6 +148,7 @@ export async function searchEpisodesByEmbedding(params: {
|
|||||||
embedding: params.embedding,
|
embedding: params.embedding,
|
||||||
minSimilarity: params.minSimilarity,
|
minSimilarity: params.minSimilarity,
|
||||||
userId: params.userId,
|
userId: params.userId,
|
||||||
|
topK: 100,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
@ -283,15 +278,10 @@ export async function getRelatedEpisodesEntities(params: {
|
|||||||
minSimilarity?: number;
|
minSimilarity?: number;
|
||||||
}) {
|
}) {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (episode:Episode {userId: $userId})
|
CALL db.index.vector.queryNodes('episode_embedding', $topK, $embedding)
|
||||||
WHERE episode.contentEmbedding IS NOT NULL
|
YIELD node AS episode, score
|
||||||
WITH episode,
|
WHERE episode.userId = $userId
|
||||||
CASE
|
AND score >= $minSimilarity
|
||||||
WHEN size(episode.contentEmbedding) = size($embedding)
|
|
||||||
THEN vector.similarity.cosine($embedding, episode.contentEmbedding)
|
|
||||||
ELSE 0
|
|
||||||
END AS score
|
|
||||||
WHERE score >= $minSimilarity
|
|
||||||
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
|
OPTIONAL MATCH (episode)-[:HAS_PROVENANCE]->(stmt:Statement)-[:HAS_SUBJECT|HAS_OBJECT]->(entity:Entity)
|
||||||
WHERE entity IS NOT NULL
|
WHERE entity IS NOT NULL
|
||||||
RETURN DISTINCT entity`;
|
RETURN DISTINCT entity`;
|
||||||
@ -300,6 +290,7 @@ export async function getRelatedEpisodesEntities(params: {
|
|||||||
embedding: params.embedding,
|
embedding: params.embedding,
|
||||||
minSimilarity: params.minSimilarity,
|
minSimilarity: params.minSimilarity,
|
||||||
userId: params.userId,
|
userId: params.userId,
|
||||||
|
topK: params.limit || 100,
|
||||||
});
|
});
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|||||||
@ -110,11 +110,10 @@ export async function findContradictoryStatements({
|
|||||||
userId: string;
|
userId: string;
|
||||||
}): Promise<StatementNode[]> {
|
}): Promise<StatementNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
MATCH (subject:Entity {uuid: $subjectId}), (predicate:Entity {uuid: $predicateId})
|
||||||
|
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_PREDICATE]->(predicate)
|
||||||
WHERE statement.userId = $userId
|
WHERE statement.userId = $userId
|
||||||
AND statement.invalidAt IS NULL
|
AND statement.invalidAt IS NULL
|
||||||
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
|
||||||
WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
|
|
||||||
RETURN statement
|
RETURN statement
|
||||||
`;
|
`;
|
||||||
|
|
||||||
@ -157,18 +156,21 @@ export async function findStatementsWithSameSubjectObject({
|
|||||||
userId: string;
|
userId: string;
|
||||||
}): Promise<StatementNode[]> {
|
}): Promise<StatementNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
MATCH (subject:Entity {uuid: $subjectId}), (object:Entity {uuid: $objectId})
|
||||||
|
MATCH (subject)<-[:HAS_SUBJECT]-(statement:Statement)-[:HAS_OBJECT]->(object)
|
||||||
|
MATCH (statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
||||||
WHERE statement.userId = $userId
|
WHERE statement.userId = $userId
|
||||||
AND statement.invalidAt IS NULL
|
AND statement.invalidAt IS NULL
|
||||||
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
|
||||||
MATCH (statement)-[:HAS_OBJECT]->(object:Entity)
|
|
||||||
WHERE subject.uuid = $subjectId
|
|
||||||
AND object.uuid = $objectId
|
|
||||||
${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""}
|
${excludePredicateId ? "AND predicate.uuid <> $excludePredicateId" : ""}
|
||||||
RETURN statement
|
RETURN statement
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const params = { subjectId, objectId, userId, ...(excludePredicateId && { excludePredicateId }) };
|
const params = {
|
||||||
|
subjectId,
|
||||||
|
objectId,
|
||||||
|
userId,
|
||||||
|
...(excludePredicateId && { excludePredicateId }),
|
||||||
|
};
|
||||||
const result = await runQuery(query, params);
|
const result = await runQuery(query, params);
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
@ -207,13 +209,12 @@ export async function findSimilarStatements({
|
|||||||
userId: string;
|
userId: string;
|
||||||
}): Promise<StatementNode[]> {
|
}): Promise<StatementNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
CALL db.index.vector.queryNodes('statement_embedding', $topK, $factEmbedding)
|
||||||
|
YIELD node AS statement, score
|
||||||
WHERE statement.userId = $userId
|
WHERE statement.userId = $userId
|
||||||
AND statement.invalidAt IS NULL
|
AND statement.invalidAt IS NULL
|
||||||
AND statement.factEmbedding IS NOT NULL
|
AND score >= $threshold
|
||||||
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
|
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
|
||||||
WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
|
|
||||||
WHERE score >= $threshold
|
|
||||||
RETURN statement, score
|
RETURN statement, score
|
||||||
ORDER BY score DESC
|
ORDER BY score DESC
|
||||||
`;
|
`;
|
||||||
@ -223,6 +224,7 @@ export async function findSimilarStatements({
|
|||||||
threshold,
|
threshold,
|
||||||
excludeIds,
|
excludeIds,
|
||||||
userId,
|
userId,
|
||||||
|
topK: 100,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
@ -396,17 +398,11 @@ export async function searchStatementsByEmbedding(params: {
|
|||||||
minSimilarity?: number;
|
minSimilarity?: number;
|
||||||
}) {
|
}) {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
CALL db.index.vector.queryNodes('statement_embedding', $topK, $embedding)
|
||||||
|
YIELD node AS statement, score
|
||||||
WHERE statement.userId = $userId
|
WHERE statement.userId = $userId
|
||||||
AND statement.invalidAt IS NULL
|
AND statement.invalidAt IS NULL
|
||||||
AND statement.factEmbedding IS NOT NULL
|
AND score >= $minSimilarity
|
||||||
WITH statement,
|
|
||||||
CASE
|
|
||||||
WHEN size(statement.factEmbedding) = size($embedding)
|
|
||||||
THEN vector.similarity.cosine($embedding, statement.factEmbedding)
|
|
||||||
ELSE 0
|
|
||||||
END AS score
|
|
||||||
WHERE score >= $minSimilarity
|
|
||||||
RETURN statement, score
|
RETURN statement, score
|
||||||
ORDER BY score DESC
|
ORDER BY score DESC
|
||||||
`;
|
`;
|
||||||
@ -416,6 +412,7 @@ export async function searchStatementsByEmbedding(params: {
|
|||||||
minSimilarity: params.minSimilarity,
|
minSimilarity: params.minSimilarity,
|
||||||
limit: params.limit,
|
limit: params.limit,
|
||||||
userId: params.userId,
|
userId: params.userId,
|
||||||
|
topK: params.limit || 100,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
|
|||||||
@ -131,23 +131,23 @@ export async function performVectorSearch(
|
|||||||
|
|
||||||
// 1. Search for similar statements using Neo4j vector search with provenance count
|
// 1. Search for similar statements using Neo4j vector search with provenance count
|
||||||
const cypher = `
|
const cypher = `
|
||||||
MATCH (s:Statement)
|
CALL db.index.vector.queryNodes('statement_embedding', $topk, $embedding)
|
||||||
WHERE
|
YIELD node AS s, score
|
||||||
(s.userId = $userId)
|
WHERE s.userId = $userId
|
||||||
${timeframeCondition}
|
AND score >= 0.7
|
||||||
${spaceCondition}
|
${timeframeCondition.replace("AND", "AND").replace("WHERE", "AND")}
|
||||||
WITH s, vector.similarity.cosine(s.factEmbedding, $embedding) AS score
|
${spaceCondition}
|
||||||
WHERE score > 0.7
|
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
|
||||||
OPTIONAL MATCH (episode:Episode)-[:HAS_PROVENANCE]->(s)
|
WITH s, score, count(episode) as provenanceCount
|
||||||
WITH s, score, count(episode) as provenanceCount
|
RETURN s, score, provenanceCount
|
||||||
RETURN s, score, provenanceCount
|
ORDER BY score DESC
|
||||||
ORDER BY score DESC
|
`;
|
||||||
`;
|
|
||||||
|
|
||||||
const params = {
|
const params = {
|
||||||
embedding: query,
|
embedding: query,
|
||||||
userId,
|
userId,
|
||||||
validAt: options.endTime.toISOString(),
|
validAt: options.endTime.toISOString(),
|
||||||
|
topk: options.limit || 100,
|
||||||
...(options.startTime && { startTime: options.startTime.toISOString() }),
|
...(options.startTime && { startTime: options.startTime.toISOString() }),
|
||||||
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
|
...(options.spaceIds.length > 0 && { spaceIds: options.spaceIds }),
|
||||||
};
|
};
|
||||||
@ -281,15 +281,13 @@ export async function extractEntitiesFromQuery(
|
|||||||
try {
|
try {
|
||||||
// Use vector similarity to find relevant entities
|
// Use vector similarity to find relevant entities
|
||||||
const cypher = `
|
const cypher = `
|
||||||
// Match entities using vector similarity on name embeddings
|
// Match entities using vector index on name embeddings
|
||||||
MATCH (e:Entity)
|
CALL db.index.vector.queryNodes('entity_embedding', 3, $embedding)
|
||||||
WHERE e.nameEmbedding IS NOT NULL
|
YIELD node AS e, score
|
||||||
AND e.userId = $userId
|
WHERE e.userId = $userId
|
||||||
WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score
|
AND score > 0.7
|
||||||
WHERE score > 0.7
|
|
||||||
RETURN e
|
RETURN e
|
||||||
ORDER BY score DESC
|
ORDER BY score DESC
|
||||||
LIMIT 3
|
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const params = {
|
const params = {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user