mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 18:08:27 +00:00
feat: add user-scoped queries and admin-only reingestion
This commit is contained in:
parent
7ac663b7ff
commit
39663db274
@ -1,10 +1,10 @@
|
|||||||
import { json } from "@remix-run/node";
|
import { json } from "@remix-run/node";
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
|
import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
|
||||||
import { addToQueue, type IngestBodyRequest } from "~/lib/ingest.server";
|
import { getUserQueue, type IngestBodyRequest } from "~/lib/ingest.server";
|
||||||
import { prisma } from "~/db.server";
|
import { prisma } from "~/db.server";
|
||||||
import { logger } from "~/services/logger.service";
|
import { logger } from "~/services/logger.service";
|
||||||
import { IngestionStatus } from "@core/database";
|
import { IngestionStatus, type Prisma } from "@core/database";
|
||||||
|
|
||||||
const ReingestionBodyRequest = z.object({
|
const ReingestionBodyRequest = z.object({
|
||||||
userId: z.string().optional(),
|
userId: z.string().optional(),
|
||||||
@ -15,9 +15,8 @@ const ReingestionBodyRequest = z.object({
|
|||||||
type ReingestionRequest = z.infer<typeof ReingestionBodyRequest>;
|
type ReingestionRequest = z.infer<typeof ReingestionBodyRequest>;
|
||||||
|
|
||||||
async function getCompletedIngestionsByUser(userId?: string, spaceId?: string) {
|
async function getCompletedIngestionsByUser(userId?: string, spaceId?: string) {
|
||||||
const whereClause: any = {
|
const whereClause: Prisma.IngestionQueueWhereInput = {
|
||||||
status: IngestionStatus.COMPLETED,
|
status: IngestionStatus.COMPLETED,
|
||||||
deleted: null
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if (userId) {
|
if (userId) {
|
||||||
@ -81,6 +80,7 @@ async function reingestionForUser(userId: string, spaceId?: string, dryRun = fal
|
|||||||
|
|
||||||
// Queue ingestions in temporal order (already sorted by createdAt ASC)
|
// Queue ingestions in temporal order (already sorted by createdAt ASC)
|
||||||
const queuedJobs = [];
|
const queuedJobs = [];
|
||||||
|
const ingestionQueue = getUserQueue(userId);
|
||||||
for (const ingestion of ingestions) {
|
for (const ingestion of ingestions) {
|
||||||
try {
|
try {
|
||||||
// Parse the original data and add reingestion metadata
|
// Parse the original data and add reingestion metadata
|
||||||
@ -96,8 +96,20 @@ async function reingestionForUser(userId: string, spaceId?: string, dryRun = fal
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const queueResult = await addToQueue(reingestionData, userId);
|
const jobDetails = await ingestionQueue.add(
|
||||||
queuedJobs.push(queueResult);
|
`ingest-user-${userId}`,
|
||||||
|
{
|
||||||
|
queueId: ingestion.id,
|
||||||
|
spaceId: ingestion.spaceId,
|
||||||
|
userId: userId,
|
||||||
|
body: ingestion.data,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
jobId: `${userId}-${Date.now()}`,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
queuedJobs.push({id: jobDetails.id});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(`Failed to queue ingestion ${ingestion.id} for user ${userId}:`, {error});
|
logger.error(`Failed to queue ingestion ${ingestion.id} for user ${userId}:`, {error});
|
||||||
}
|
}
|
||||||
@ -124,6 +136,23 @@ const { action, loader } = createActionApiRoute(
|
|||||||
const { userId, spaceId, dryRun } = body;
|
const { userId, spaceId, dryRun } = body;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Check if the user is an admin
|
||||||
|
const user = await prisma.user.findUnique({
|
||||||
|
where: { id: authentication.userId }
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!user || user.admin !== true) {
|
||||||
|
logger.warn("Unauthorized reingest attempt", {
|
||||||
|
requestUserId: authentication.userId,
|
||||||
|
});
|
||||||
|
return json(
|
||||||
|
{
|
||||||
|
success: false,
|
||||||
|
error: "Unauthorized: Only admin users can perform reingestion"
|
||||||
|
},
|
||||||
|
{ status: 403 }
|
||||||
|
);
|
||||||
|
}
|
||||||
if (userId) {
|
if (userId) {
|
||||||
// Reingest for specific user
|
// Reingest for specific user
|
||||||
const result = await reingestionForUser(userId, spaceId, dryRun);
|
const result = await reingestionForUser(userId, spaceId, dryRun);
|
||||||
|
|||||||
@ -101,19 +101,22 @@ export async function saveTriple(triple: Triple): Promise<string> {
|
|||||||
export async function findContradictoryStatements({
|
export async function findContradictoryStatements({
|
||||||
subjectId,
|
subjectId,
|
||||||
predicateId,
|
predicateId,
|
||||||
|
userId,
|
||||||
}: {
|
}: {
|
||||||
subjectId: string;
|
subjectId: string;
|
||||||
predicateId: string;
|
predicateId: string;
|
||||||
|
userId: string;
|
||||||
}): Promise<StatementNode[]> {
|
}): Promise<StatementNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
MATCH (statement:Statement)
|
||||||
WHERE statement.invalidAt IS NULL
|
WHERE statement.userId = $userId
|
||||||
|
AND statement.invalidAt IS NULL
|
||||||
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
MATCH (subject:Entity)<-[:HAS_SUBJECT]-(statement)-[:HAS_PREDICATE]->(predicate:Entity)
|
||||||
WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
|
WHERE subject.uuid = $subjectId AND predicate.uuid = $predicateId
|
||||||
RETURN statement
|
RETURN statement
|
||||||
`;
|
`;
|
||||||
|
|
||||||
const result = await runQuery(query, { subjectId, predicateId });
|
const result = await runQuery(query, { subjectId, predicateId, userId });
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
@ -143,14 +146,17 @@ export async function findSimilarStatements({
|
|||||||
factEmbedding,
|
factEmbedding,
|
||||||
threshold = 0.85,
|
threshold = 0.85,
|
||||||
excludeIds = [],
|
excludeIds = [],
|
||||||
|
userId,
|
||||||
}: {
|
}: {
|
||||||
factEmbedding: number[];
|
factEmbedding: number[];
|
||||||
threshold?: number;
|
threshold?: number;
|
||||||
excludeIds?: string[];
|
excludeIds?: string[];
|
||||||
|
userId: string;
|
||||||
}): Promise<StatementNode[]> {
|
}): Promise<StatementNode[]> {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
MATCH (statement:Statement)
|
||||||
WHERE statement.invalidAt IS NULL
|
WHERE statement.userId = $userId
|
||||||
|
AND statement.invalidAt IS NULL
|
||||||
AND statement.factEmbedding IS NOT NULL
|
AND statement.factEmbedding IS NOT NULL
|
||||||
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
|
${excludeIds.length > 0 ? "AND NOT statement.uuid IN $excludeIds" : ""}
|
||||||
WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
|
WITH statement, vector.similarity.cosine($factEmbedding, statement.factEmbedding) AS score
|
||||||
@ -163,6 +169,7 @@ export async function findSimilarStatements({
|
|||||||
factEmbedding,
|
factEmbedding,
|
||||||
threshold,
|
threshold,
|
||||||
excludeIds,
|
excludeIds,
|
||||||
|
userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
@ -334,7 +341,8 @@ export async function searchStatementsByEmbedding(params: {
|
|||||||
}) {
|
}) {
|
||||||
const query = `
|
const query = `
|
||||||
MATCH (statement:Statement)
|
MATCH (statement:Statement)
|
||||||
WHERE statement.invalidAt IS NULL
|
WHERE statement.userId = $userId
|
||||||
|
AND statement.invalidAt IS NULL
|
||||||
AND statement.factEmbedding IS NOT NULL
|
AND statement.factEmbedding IS NOT NULL
|
||||||
WITH statement,
|
WITH statement,
|
||||||
CASE
|
CASE
|
||||||
@ -351,6 +359,7 @@ export async function searchStatementsByEmbedding(params: {
|
|||||||
embedding: params.embedding,
|
embedding: params.embedding,
|
||||||
minSimilarity: params.minSimilarity,
|
minSimilarity: params.minSimilarity,
|
||||||
limit: params.limit,
|
limit: params.limit,
|
||||||
|
userId: params.userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!result || result.length === 0) {
|
if (!result || result.length === 0) {
|
||||||
|
|||||||
@ -600,6 +600,7 @@ export class KnowledgeGraphService {
|
|||||||
const exactMatches = await findContradictoryStatements({
|
const exactMatches = await findContradictoryStatements({
|
||||||
subjectId: triple.subject.uuid,
|
subjectId: triple.subject.uuid,
|
||||||
predicateId: triple.predicate.uuid,
|
predicateId: triple.predicate.uuid,
|
||||||
|
userId: triple.provenance.userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (exactMatches && exactMatches.length > 0) {
|
if (exactMatches && exactMatches.length > 0) {
|
||||||
@ -612,6 +613,7 @@ export class KnowledgeGraphService {
|
|||||||
factEmbedding: triple.statement.factEmbedding,
|
factEmbedding: triple.statement.factEmbedding,
|
||||||
threshold: 0.85,
|
threshold: 0.85,
|
||||||
excludeIds: checkedStatementIds,
|
excludeIds: checkedStatementIds,
|
||||||
|
userId: triple.provenance.userId,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (semanticMatches && semanticMatches.length > 0) {
|
if (semanticMatches && semanticMatches.length > 0) {
|
||||||
|
|||||||
@ -138,7 +138,7 @@ export async function performBfsSearch(
|
|||||||
): Promise<StatementNode[]> {
|
): Promise<StatementNode[]> {
|
||||||
try {
|
try {
|
||||||
// 1. Extract potential entities from query
|
// 1. Extract potential entities from query
|
||||||
const entities = await extractEntitiesFromQuery(embedding);
|
const entities = await extractEntitiesFromQuery(embedding, userId);
|
||||||
|
|
||||||
// 2. For each entity, perform BFS traversal
|
// 2. For each entity, perform BFS traversal
|
||||||
const allStatements: StatementNode[] = [];
|
const allStatements: StatementNode[] = [];
|
||||||
@ -223,13 +223,15 @@ export async function bfsTraversal(
|
|||||||
*/
|
*/
|
||||||
export async function extractEntitiesFromQuery(
|
export async function extractEntitiesFromQuery(
|
||||||
embedding: Embedding,
|
embedding: Embedding,
|
||||||
|
userId: string,
|
||||||
): Promise<EntityNode[]> {
|
): Promise<EntityNode[]> {
|
||||||
try {
|
try {
|
||||||
// Use vector similarity to find relevant entities
|
// Use vector similarity to find relevant entities
|
||||||
const cypher = `
|
const cypher = `
|
||||||
// Match entities using vector similarity on name embeddings
|
// Match entities using vector similarity on name embeddings
|
||||||
MATCH (e:Entity)
|
MATCH (e:Entity)
|
||||||
WHERE e.nameEmbedding IS NOT NULL
|
WHERE e.nameEmbedding IS NOT NULL
|
||||||
|
AND e.userId = $userId
|
||||||
WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score
|
WITH e, vector.similarity.cosine(e.nameEmbedding, $embedding) AS score
|
||||||
WHERE score > 0.7
|
WHERE score > 0.7
|
||||||
RETURN e
|
RETURN e
|
||||||
@ -239,6 +241,7 @@ export async function extractEntitiesFromQuery(
|
|||||||
|
|
||||||
const params = {
|
const params = {
|
||||||
embedding,
|
embedding,
|
||||||
|
userId,
|
||||||
};
|
};
|
||||||
|
|
||||||
const records = await runQuery(cypher, params);
|
const records = await runQuery(cypher, params);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user