Harshith Mullapudi 4a0a57cb97
Feat: add documents to the kg (#64)
* Feat: add documents to the kg

* Feat: add versioning to documents

* Fix: invalidation of evolved facts

* fix: mcp return

* fix: invalidAt is not displayed in graph popover

* Fix: use document id for the flow

* refactor: consolidate document versioning around sessionId instead of documentId

* fix: add docs link in welcome email

* fix: give more time for larger graphs to settle on

* bump: new version 0.1.20

---------

Co-authored-by: Manoj K <saimanoj58@gmail.com>
2025-09-03 12:39:46 +05:30

251 lines
7.7 KiB
TypeScript

import { runQuery } from "~/lib/neo4j.server";
import type { DocumentNode } from "@core/types";
import crypto from "crypto";
export async function saveDocument(document: DocumentNode): Promise<string> {
const query = `
MERGE (d:Document {uuid: $uuid})
ON CREATE SET
d.title = $title,
d.originalContent = $originalContent,
d.metadata = $metadata,
d.source = $source,
d.userId = $userId,
d.createdAt = $createdAt,
d.validAt = $validAt,
d.totalChunks = $totalChunks,
d.sessionId = $sessionId,
d.version = $version,
d.contentHash = $contentHash,
d.previousVersionUuid = $previousVersionUuid,
d.chunkHashes = $chunkHashes
ON MATCH SET
d.title = $title,
d.originalContent = $originalContent,
d.metadata = $metadata,
d.source = $source,
d.validAt = $validAt,
d.totalChunks = $totalChunks,
d.sessionId = $sessionId,
d.version = $version,
d.contentHash = $contentHash,
d.previousVersionUuid = $previousVersionUuid,
d.chunkHashes = $chunkHashes
RETURN d.uuid as uuid
`;
const params = {
uuid: document.uuid,
title: document.title,
originalContent: document.originalContent,
metadata: JSON.stringify(document.metadata || {}),
source: document.source,
userId: document.userId || null,
createdAt: document.createdAt.toISOString(),
validAt: document.validAt.toISOString(),
totalChunks: document.totalChunks || 0,
sessionId: document.sessionId || null,
version: document.version || 1,
contentHash: document.contentHash,
previousVersionUuid: document.previousVersionUuid || null,
chunkHashes: document.chunkHashes || [],
};
const result = await runQuery(query, params);
return result[0].get("uuid");
}
export async function linkEpisodeToDocument(
episodeUuid: string,
documentUuid: string,
chunkIndex: number,
): Promise<void> {
const query = `
MATCH (e:Episode {uuid: $episodeUuid})
MATCH (d:Document {uuid: $documentUuid})
MERGE (d)-[r:CONTAINS_CHUNK {chunkIndex: $chunkIndex}]->(e)
SET e.chunkIndex = $chunkIndex
RETURN r
`;
const params = {
episodeUuid,
documentUuid,
chunkIndex,
};
await runQuery(query, params);
}
export async function getDocument(
documentUuid: string,
): Promise<DocumentNode | null> {
const query = `
MATCH (d:Document {uuid: $uuid})
RETURN d
`;
const params = { uuid: documentUuid };
const result = await runQuery(query, params);
if (result.length === 0) return null;
const record = result[0];
const documentNode = record.get("d");
return {
uuid: documentNode.properties.uuid,
title: documentNode.properties.title,
originalContent: documentNode.properties.originalContent,
metadata: JSON.parse(documentNode.properties.metadata || "{}"),
source: documentNode.properties.source,
userId: documentNode.properties.userId,
createdAt: new Date(documentNode.properties.createdAt),
validAt: new Date(documentNode.properties.validAt),
totalChunks: documentNode.properties.totalChunks,
version: documentNode.properties.version || 1,
contentHash: documentNode.properties.contentHash || "",
previousVersionUuid: documentNode.properties.previousVersionUuid || null,
chunkHashes: documentNode.properties.chunkHashes || [],
};
}
export async function getDocumentEpisodes(documentUuid: string): Promise<
Array<{
episodeUuid: string;
chunkIndex: number;
content: string;
}>
> {
const query = `
MATCH (d:Document {uuid: $uuid})-[r:CONTAINS_CHUNK]->(e:Episode)
RETURN e.uuid as episodeUuid, r.chunkIndex as chunkIndex, e.content as content
ORDER BY r.chunkIndex ASC
`;
const params = { uuid: documentUuid };
const result = await runQuery(query, params);
return result.map((record) => ({
episodeUuid: record.get("episodeUuid"),
chunkIndex: record.get("chunkIndex"),
content: record.get("content"),
}));
}
export async function getUserDocuments(
userId: string,
limit: number = 50,
): Promise<DocumentNode[]> {
const query = `
MATCH (d:Document {userId: $userId})
RETURN d
ORDER BY d.createdAt DESC
LIMIT $limit
`;
const params = { userId, limit };
const result = await runQuery(query, params);
return result.map((record) => {
const documentNode = record.get("d");
return {
uuid: documentNode.properties.uuid,
title: documentNode.properties.title,
originalContent: documentNode.properties.originalContent,
metadata: JSON.parse(documentNode.properties.metadata || "{}"),
source: documentNode.properties.source,
userId: documentNode.properties.userId,
createdAt: new Date(documentNode.properties.createdAt),
validAt: new Date(documentNode.properties.validAt),
totalChunks: documentNode.properties.totalChunks,
version: documentNode.properties.version || 1,
contentHash: documentNode.properties.contentHash || "",
previousVersionUuid: documentNode.properties.previousVersionUuid || null,
chunkHashes: documentNode.properties.chunkHashes || [],
};
});
}
/**
* Generate content hash for document versioning
*/
export function generateContentHash(content: string): string {
return crypto.createHash("sha256").update(content, "utf8").digest("hex");
}
/**
* Find existing document by documentId and userId for version comparison
*/
export async function findExistingDocument(
sessionId: string,
userId: string,
): Promise<DocumentNode | null> {
const query = `
MATCH (d:Document {sessionId: $sessionId, userId: $userId})
RETURN d
ORDER BY d.version DESC
LIMIT 1
`;
const params = { sessionId, userId };
const result = await runQuery(query, params);
if (result.length === 0) return null;
const documentNode = result[0].get("d");
return {
uuid: documentNode.properties.uuid,
title: documentNode.properties.title,
originalContent: documentNode.properties.originalContent,
metadata: JSON.parse(documentNode.properties.metadata || "{}"),
source: documentNode.properties.source,
userId: documentNode.properties.userId,
createdAt: new Date(documentNode.properties.createdAt),
validAt: new Date(documentNode.properties.validAt),
totalChunks: documentNode.properties.totalChunks,
version: documentNode.properties.version || 1,
contentHash: documentNode.properties.contentHash || "",
previousVersionUuid: documentNode.properties.previousVersionUuid || null,
chunkHashes: documentNode.properties.chunkHashes || [],
};
}
/**
* Get document version history
*/
export async function getDocumentVersions(
sessionId: string,
userId: string,
limit: number = 10,
): Promise<DocumentNode[]> {
const query = `
MATCH (d:Document {sessionId: $sessionId, userId: $userId})
RETURN d
ORDER BY d.version DESC
LIMIT $limit
`;
const params = { sessionId, userId, limit };
const result = await runQuery(query, params);
return result.map((record) => {
const documentNode = record.get("d");
return {
uuid: documentNode.properties.uuid,
title: documentNode.properties.title,
originalContent: documentNode.properties.originalContent,
metadata: JSON.parse(documentNode.properties.metadata || "{}"),
source: documentNode.properties.source,
userId: documentNode.properties.userId,
createdAt: new Date(documentNode.properties.createdAt),
validAt: new Date(documentNode.properties.validAt),
totalChunks: documentNode.properties.totalChunks,
version: documentNode.properties.version || 1,
contentHash: documentNode.properties.contentHash || "",
previousVersionUuid: documentNode.properties.previousVersionUuid || null,
chunkHashes: documentNode.properties.chunkHashes || [],
};
});
}