-
+
+
+
+
+
+ {isConnected && (
+
+
+ Connected
+
+
+ )}
{integration.name}
{integration.description || `Connect to ${integration.name}`}
- {isConnected && (
-
-
-
- Connected
-
-
-
- )}
);
diff --git a/apps/webapp/app/components/integrations/utils.tsx b/apps/webapp/app/components/integrations/utils.tsx
new file mode 100644
index 0000000..05ac9b3
--- /dev/null
+++ b/apps/webapp/app/components/integrations/utils.tsx
@@ -0,0 +1,34 @@
+export const FIXED_INTEGRATIONS = [
+ {
+ id: "claude",
+ name: "Claude",
+ description: "AI assistant for coding, writing, and analysis",
+ icon: "claude",
+ slug: "claude",
+ spec: {},
+ },
+ {
+ id: "cursor",
+ name: "Cursor",
+ description: "AI-powered code editor",
+ icon: "cursor",
+ slug: "cursor",
+ spec: {},
+ },
+ {
+ id: "cline",
+ name: "Cline",
+ description: "AI coding assistant for terminal and command line",
+ icon: "cline",
+ slug: "cline",
+ spec: {},
+ },
+ {
+ id: "vscode",
+ name: "Visual Studio Code",
+ description: "Popular code editor with extensive extensions",
+ icon: "vscode",
+ slug: "vscode",
+ spec: {},
+ },
+];
diff --git a/apps/webapp/app/components/logs/log-text-collapse.tsx b/apps/webapp/app/components/logs/log-text-collapse.tsx
index dfc6123..5ad3eb0 100644
--- a/apps/webapp/app/components/logs/log-text-collapse.tsx
+++ b/apps/webapp/app/components/logs/log-text-collapse.tsx
@@ -4,6 +4,7 @@ import { AlertCircle, Info, Trash } from "lucide-react";
import { cn } from "~/lib/utils";
import { Dialog, DialogContent, DialogHeader, DialogTitle } from "../ui/dialog";
import { Button } from "../ui";
+import { Popover, PopoverContent, PopoverTrigger } from "../ui/popover";
import {
AlertDialog,
AlertDialogAction,
@@ -15,21 +16,42 @@ import {
AlertDialogTitle,
AlertDialogTrigger,
} from "../ui/alert-dialog";
+import { Badge } from "../ui/badge";
+import { type LogItem } from "~/hooks/use-logs";
interface LogTextCollapseProps {
text?: string;
error?: string;
logData: any;
+ log: LogItem;
id: string;
episodeUUID?: string;
}
+const getStatusColor = (status: string) => {
+ switch (status) {
+ case "PROCESSING":
+ return "bg-blue-100 text-blue-800 hover:bg-blue-100 hover:text-blue-800";
+ case "PENDING":
+ return "bg-yellow-100 text-yellow-800 hover:bg-yellow-100 hover:text-yellow-800";
+ case "COMPLETED":
+ return "bg-success/10 text-success hover:bg-success/10 hover:text-success";
+ case "FAILED":
+ return "bg-destructive/10 text-destructive hover:bg-destructive/10 hover:text-destructive";
+ case "CANCELLED":
+ return "bg-gray-100 text-gray-800 hover:bg-gray-100 hover:text-gray-800";
+ default:
+ return "bg-gray-100 text-gray-800 hover:bg-gray-100 hover:text-gray-800";
+ }
+};
+
export function LogTextCollapse({
episodeUUID,
text,
error,
id,
logData,
+ log,
}: LogTextCollapseProps) {
const [dialogOpen, setDialogOpen] = useState(false);
const [deleteDialogOpen, setDeleteDialogOpen] = useState(false);
@@ -75,19 +97,28 @@ export function LogTextCollapse({
}
return (
- <>
-
-
+
+
+ >
+
+
setDialogOpen(true)}
+ >
+
+
- {isLong && (
- <>
- >
- )}
-
-
- {isLong && (
-
-
- {episodeUUID && (
-
-
-
-
-
-
- Delete Episode
-
- Are you sure you want to delete this episode? This action
- cannot be undone.
-
-
-
- Cancel
-
- Continue
-
-
-
-
- )}
+
+
+
+ {log.status.charAt(0).toUpperCase() +
+ log.status.slice(1).toLowerCase()}
+
+
+
+ {new Date(log.time).toLocaleString()}
+
+
+
+ {episodeUUID && (
+
+
+
+
+
+
+ Delete Episode
+
+ Are you sure you want to delete this episode? This
+ action cannot be undone.
+
+
+
+ Cancel
+
+ Continue
+
+
+
+
+ )}
+
+
- )}
- {error && (
-
- )}
+
- >
+
);
}
diff --git a/apps/webapp/app/components/logs/logs-filters.tsx b/apps/webapp/app/components/logs/logs-filters.tsx
index 4d391f5..4ee1d47 100644
--- a/apps/webapp/app/components/logs/logs-filters.tsx
+++ b/apps/webapp/app/components/logs/logs-filters.tsx
@@ -51,7 +51,7 @@ export function LogsFilters({
const handleBack = () => setStep("main");
return (
- ();
+function CustomIntegrationContent({ integration }: { integration: any }) {
+ const memoryUrl = `https://core.heysol.ai/api/v1/mcp/memory?source=${integration.slug}`;
+ const [copied, setCopied] = useState(false);
+ const copyToClipboard = async () => {
+ try {
+ await navigator.clipboard.writeText(memoryUrl);
+ setCopied(true);
+ setTimeout(() => setCopied(false), 2000);
+ } catch (err) {
+ console.error("Failed to copy:", err);
+ }
+ };
+
+ const getCustomContent = () => {
+ switch (integration.id) {
+ case "claude":
+ return {
+ title: "About Claude",
+ content: (
+
+
+ Claude is an AI assistant created by Anthropic. It can help with
+ a wide variety of tasks including:
+
+
+ - Code generation and debugging
+ - Writing and editing
+ - Analysis and research
+ - Problem-solving
+
+
+
+ For Claude Web, Desktop, and Code - OAuth authentication handled
+ automatically
+
+
+
+
+
+
+
+ ),
+ };
+ case "cursor":
+ return {
+ title: "About Cursor",
+ content: (
+
+
+ Cursor is an AI-powered code editor that helps developers write
+ code faster and more efficiently.
+
+
+ - AI-powered code completion
+ - Natural language to code conversion
+ - Code explanation and debugging
+ - Refactoring assistance
+
+
+
+ {JSON.stringify(
+ {
+ memory: {
+ url: memoryUrl,
+ },
+ },
+ null,
+ 2,
+ )}
+
+
+
+
+ ),
+ };
+ case "cline":
+ return {
+ title: "About Cline",
+ content: (
+
+
+ Cline is an AI coding assistant that works directly in your
+ terminal and command line environment.
+
+
+ - Command line AI assistance
+ - Terminal-based code generation
+ - Shell script optimization
+ - DevOps automation help
+
+
+
+
+
+
+ ),
+ };
+ case "vscode":
+ return {
+ title: "About Visual Studio Code",
+ content: (
+
+
+ Visual Studio Code is a lightweight but powerful source code
+ editor with extensive extension support.
+
+
+ - Intelligent code completion
+ - Built-in Git integration
+ - Extensive extension marketplace
+ - Debugging and testing tools
+
+
You need to enable MCP in settings
+
+
+ {JSON.stringify(
+ {
+ "chat.mcp.enabled": true,
+ "chat.mcp.discovery.enabled": true,
+ },
+ null,
+ 2,
+ )}
+
+
+
+
+ {JSON.stringify(
+ {
+ memory: {
+ type: "http",
+ url: memoryUrl,
+ },
+ },
+ null,
+ 2,
+ )}
+
+
+
+
+ ),
+ };
+ default:
+ return null;
+ }
+ };
+
+ const customContent = getCustomContent();
+
+ if (!customContent) return null;
+ const Component = getIcon(integration.icon as IconType);
+
+ return (
+
+
,
+ onClick: () =>
+ window.open(
+ "https://github.com/redplanethq/core/issues/new",
+ "_blank",
+ ),
+ variant: "secondary",
+ },
+ ]}
+ />
+
+
+
+ }
+ >
+
{customContent.content}
+
+
+
+
+ );
+}
+
+interface IntegrationDetailProps {
+ integration: any;
+ integrationAccounts: any;
+ ingestionRule: any;
+}
+
+export function IntegrationDetail({
+ integration,
+ integrationAccounts,
+ ingestionRule,
+}: IntegrationDetailProps) {
const activeAccount = useMemo(
() =>
integrationAccounts.find(
- (acc) => acc.integrationDefinitionId === integration.id && acc.isActive,
+ (acc: IntegrationAccount) =>
+ acc.integrationDefinitionId === integration.id && acc.isActive,
),
[integrationAccounts, integration.id],
);
@@ -181,21 +492,21 @@ export default function IntegrationDetail() {
{hasApiKey && (
-
+
API Key authentication
)}
{hasOAuth2 && (
-
+
OAuth 2.0 authentication
)}
{!hasApiKey && !hasOAuth2 && !hasMCPAuth && (
-
+
No authentication method specified
)}
@@ -226,7 +537,7 @@ export default function IntegrationDetail() {
)}
{/* Connected Account Info */}
-
+
{/* MCP Authentication Section */}
);
}
+
+export default function IntegrationDetailWrapper() {
+ const { integration, integrationAccounts, ingestionRule } =
+ useLoaderData();
+
+ const { slug } = useParams();
+ // You can now use the `slug` param in your component
+
+ const fixedIntegration = FIXED_INTEGRATIONS.some(
+ (fixedInt) => fixedInt.slug === slug,
+ );
+
+ return (
+ <>
+ {fixedIntegration ? (
+
+ ) : (
+
+ )}
+ >
+ );
+}
diff --git a/apps/webapp/app/routes/home.integrations.tsx b/apps/webapp/app/routes/home.integrations.tsx
index 1a8af63..65a83e9 100644
--- a/apps/webapp/app/routes/home.integrations.tsx
+++ b/apps/webapp/app/routes/home.integrations.tsx
@@ -8,6 +8,7 @@ import { getIntegrationAccounts } from "~/services/integrationAccount.server";
import { IntegrationGrid } from "~/components/integrations/integration-grid";
import { PageHeader } from "~/components/common/page-header";
import { Plus } from "lucide-react";
+import { FIXED_INTEGRATIONS } from "~/components/integrations/utils";
export async function loader({ request }: LoaderFunctionArgs) {
const userId = await requireUserId(request);
@@ -18,8 +19,11 @@ export async function loader({ request }: LoaderFunctionArgs) {
getIntegrationAccounts(userId),
]);
+ // Combine fixed integrations with dynamic ones
+ const allIntegrations = [...FIXED_INTEGRATIONS, ...integrationDefinitions];
+
return json({
- integrationDefinitions,
+ integrationDefinitions: allIntegrations,
integrationAccounts,
userId,
});
diff --git a/apps/webapp/app/routes/home.logs.all.tsx b/apps/webapp/app/routes/home.logs.all.tsx
index 439c520..dc96362 100644
--- a/apps/webapp/app/routes/home.logs.all.tsx
+++ b/apps/webapp/app/routes/home.logs.all.tsx
@@ -1,5 +1,5 @@
-import { useState } from "react";
-import { useNavigate } from "@remix-run/react";
+import { useState, useEffect } from "react";
+import { useNavigate, useFetcher } from "@remix-run/react";
import { useLogs } from "~/hooks/use-logs";
import { LogsFilters } from "~/components/logs/logs-filters";
import { VirtualLogsList } from "~/components/logs/virtual-logs-list";
@@ -7,11 +7,13 @@ import { AppContainer, PageContainer } from "~/components/layout/app-layout";
import { Card, CardContent } from "~/components/ui/card";
import { Database, LoaderCircle } from "lucide-react";
import { PageHeader } from "~/components/common/page-header";
+import { ContributionGraph } from "~/components/activity/contribution-graph";
export default function LogsAll() {
const navigate = useNavigate();
const [selectedSource, setSelectedSource] = useState();
const [selectedStatus, setSelectedStatus] = useState();
+ const contributionFetcher = useFetcher();
const {
logs,
@@ -26,17 +28,41 @@ export default function LogsAll() {
status: selectedStatus,
});
+ // Fetch contribution data on mount
+ useEffect(() => {
+ if (contributionFetcher.state === "idle" && !contributionFetcher.data) {
+ contributionFetcher.load("/api/v1/activity/contribution");
+ }
+ }, [contributionFetcher]);
+
+ // Get contribution data from fetcher
+ const contributionData = contributionFetcher.data?.success
+ ? contributionFetcher.data.data.contributionData
+ : [];
+ const totalActivities = contributionFetcher.data?.success
+ ? contributionFetcher.data.data.totalActivities
+ : 0;
+ const isContributionLoading =
+ contributionFetcher.state === "loading" || !contributionFetcher.data;
+
return (
<>
-
+
+ {/* Contribution Graph */}
+
+ {isContributionLoading ? (
+
+ ) : (
+
+ )}
+
{isInitialLoad ? (
<>
-
{" "}
+
>
) : (
<>
- {" "}
{/* Filters */}
{logs.length > 0 && (
("Claude");
const [form, fields] = useForm({
lastSubmission: lastSubmission as any,
@@ -117,7 +121,12 @@ export default function Onboarding() {
},
});
- const memoryUrl = "https://core.heysol.ai/api/v1/mcp/memory";
+ const getMemoryUrl = (source: "Claude" | "Cursor" | "Other") => {
+ const baseUrl = "https://core.heysol.ai/api/v1/mcp/memory";
+ return `${baseUrl}?Source=${source}`;
+ };
+
+ const memoryUrl = getMemoryUrl(selectedSource);
const copyToClipboard = async () => {
try {
@@ -144,7 +153,25 @@ export default function Onboarding() {
-
+
+
+ {(["Claude", "Cursor", "Other"] as const).map((source) => (
+
+ ))}
+
+
{
+ logger.info(
+ `Creating statement similarity graph for clustering (${incremental ? "incremental" : "complete"})`,
+ );
+
+ const query = `
+ MATCH (s1:Statement)-[:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]->(e:Entity)<-[:HAS_SUBJECT|HAS_PREDICATE|HAS_OBJECT]-(s2:Statement)
+ WHERE s1.userId = $userId
+ AND s2.userId = $userId
+ AND s1.invalidAt IS NULL
+ AND s2.invalidAt IS NULL
+ AND id(s1) < id(s2)
+ WITH s1, s2, collect(DISTINCT e.uuid) as sharedEntities
+ WHERE size(sharedEntities) > 0
+ MERGE (s1)-[r:SIMILAR_TO]-(s2)
+ SET r.weight = size(sharedEntities) * 2,
+ r.sharedEntities = sharedEntities,
+ r.createdAt = datetime()
+ RETURN count(r) as edgesCreated
+ `;
+ const result = await runQuery(query, { userId });
+ const edgesCreated = result[0]?.get("edgesCreated") || 0;
+
+ logger.info(
+ `${incremental ? "Updated" : "Created"} ${edgesCreated} similarity edges between statements`,
+ );
+ }
+
+ /**
+ * Execute Leiden algorithm for community detection on statement similarity graph
+ */
+ async executeLeidenClustering(
+ userId: string,
+ incremental: boolean = false,
+ ): Promise
{
+ logger.info(
+ `Executing Leiden clustering algorithm (${incremental ? "incremental" : "complete"})`,
+ );
+
+ // Create/update the similarity graph
+ await this.createStatementSimilarityGraph(userId, incremental);
+
+ const clusteringQuery = `
+ MATCH (source:Statement) WHERE source.userId = $userId
+ OPTIONAL MATCH (source)-[r:SIMILAR_TO]->(target:Statement)
+ WHERE target.userId = $userId
+ WITH gds.graph.project(
+ 'statementSimilarity_' + $userId,
+ source,
+ target,
+ {
+ relationshipProperties: r { .weight }
+ },
+ { undirectedRelationshipTypes: ['*'] }
+ ) AS g
+
+ CALL gds.leiden.write(
+ g.graphName,
+ {
+ writeProperty: 'tempClusterId',
+ relationshipWeightProperty: 'weight',
+ gamma: 0.7,
+ maxLevels: 10,
+ tolerance: 0.001
+ }
+ )
+ YIELD communityCount
+
+ CALL gds.graph.drop(g.graphName)
+ YIELD graphName as droppedGraphName
+
+ RETURN communityCount, g.nodeCount, g.relationshipCount
+ `;
+
+ const result = await runQuery(clusteringQuery, {
+ userId,
+ gamma: this.LEIDEN_GAMMA,
+ maxLevels: this.LEIDEN_MAX_LEVELS,
+ tolerance: this.LEIDEN_TOLERANCE,
+ });
+
+ const communityCount = result[0]?.get("communityCount") || 0;
+ logger.info(`Leiden clustering found ${communityCount} communities`);
+
+ // Filter clusters by minimum size and assign final cluster IDs
+ await this.filterAndAssignClusters(userId, incremental);
+
+ const removeRelationsQuery = `
+ MATCH (s1:Statement)-[r:SIMILAR_TO]-(s2:Statement)
+ WHERE s1.userId = $userId AND s2.userId = $userId
+ DELETE r`;
+
+ await runQuery(removeRelationsQuery, { userId });
+ }
+
+ /**
+ * Perform incremental clustering for new statements
+ */
+ async performIncrementalClustering(userId: string): Promise<{
+ newStatementsProcessed: number;
+ newClustersCreated: number;
+ }> {
+ logger.info(`Starting incremental clustering for user ${userId}`);
+
+ try {
+ // Check if there are unclustered statements
+ const unclusteredQuery = `
+ MATCH (s:Statement)
+ WHERE s.userId = $userId AND s.clusterId IS NULL AND s.invalidAt IS NULL
+ RETURN count(s) as unclusteredCount
+ `;
+
+ const unclusteredResult = await runQuery(unclusteredQuery, { userId });
+ const unclusteredCount =
+ unclusteredResult[0]?.get("unclusteredCount") || 0;
+
+ if (unclusteredCount === 0) {
+ logger.info(
+ "No unclustered statements found, skipping incremental clustering",
+ );
+ return {
+ newStatementsProcessed: 0,
+ newClustersCreated: 0,
+ };
+ }
+
+ logger.info(`Found ${unclusteredCount} unclustered statements`);
+
+ let newClustersCreated = 0;
+ // Run incremental clustering on remaining statements
+ await this.executeLeidenClustering(userId, true);
+ await this.createClusterNodes(userId);
+
+ // Count new clusters created
+ const newClustersQuery = `
+ MATCH (c:Cluster)
+ WHERE c.userId = $userId AND c.createdAt > datetime() - duration('PT5M')
+ RETURN count(c) as newClusters
+ `;
+ const newClustersResult = await runQuery(newClustersQuery, { userId });
+ newClustersCreated = newClustersResult[0]?.get("newClusters") || 0;
+
+ const drift = await this.detectClusterDrift(userId);
+ const newClustersCreatedDrift = 0;
+ if (drift.driftDetected) {
+ logger.info("Cluster drift detected, evolving clusters");
+ const { newClustersCreated: newClustersCreatedDrift, splitClusters } =
+ await this.handleClusterDrift(userId);
+
+ if (splitClusters.length > 0) {
+ logger.info("Split clusters detected, evolving clusters");
+ }
+ }
+
+ return {
+ newStatementsProcessed: unclusteredCount,
+ newClustersCreated: newClustersCreated + newClustersCreatedDrift,
+ };
+ } catch (error) {
+ logger.error("Error in incremental clustering:", { error });
+ throw error;
+ }
+ }
+
+ /**
+ * Perform complete clustering (for new users or full rebuilds)
+ */
+ async performCompleteClustering(userId: string): Promise<{
+ clustersCreated: number;
+ statementsProcessed: number;
+ }> {
+ logger.info(`Starting complete clustering for user ${userId}`);
+
+ try {
+ // Clear any existing cluster assignments
+ await runQuery(
+ `
+ MATCH (s:Statement)
+ WHERE s.userId = $userId
+ REMOVE s.clusterId, s.tempClusterId
+ `,
+ { userId },
+ );
+
+ // Clear statement-to-statement similarity relationships
+ await runQuery(
+ `
+ MATCH (s1:Statement)-[r:SIMILAR_TO]-(s2:Statement)
+ WHERE s1.userId = $userId AND s2.userId = $userId
+ DELETE r
+ `,
+ { userId },
+ );
+
+ // Clear existing cluster nodes
+ await runQuery(
+ `
+ MATCH (c:Cluster)
+ WHERE c.userId = $userId
+ DETACH DELETE c
+ `,
+ { userId },
+ );
+
+ // Execute complete clustering pipeline
+ await this.executeLeidenClustering(userId, false);
+ await this.createClusterNodes(userId);
+
+ // Get results
+ const resultsQuery = `
+ MATCH (c:Cluster) WHERE c.userId = $userId
+ WITH count(c) as clusters
+ MATCH (s:Statement) WHERE s.userId = $userId AND s.clusterId IS NOT NULL
+ RETURN clusters, count(s) as statementsProcessed
+ `;
+
+ const results = await runQuery(resultsQuery, { userId });
+ const clustersCreated = results[0]?.get("clusters") || 0;
+ const statementsProcessed = results[0]?.get("statementsProcessed") || 0;
+
+ logger.info(
+ `Complete clustering finished: ${clustersCreated} clusters, ${statementsProcessed} statements processed`,
+ );
+
+ return { clustersCreated, statementsProcessed };
+ } catch (error) {
+ logger.error("Error in complete clustering:", { error });
+ throw error;
+ }
+ }
+
+ /**
+ * Filter clusters by minimum size and assign final cluster IDs
+ */
+ private async filterAndAssignClusters(
+ userId: string,
+ incremental: boolean = false,
+ ): Promise {
+ const filterQuery = `
+ // Step 1: Get all temp cluster groups and their total sizes
+ MATCH (s:Statement)
+ WHERE s.userId = $userId AND s.tempClusterId IS NOT NULL
+ WITH s.tempClusterId as tempId, collect(s) as allStatements
+
+ // Step 2: Filter by minimum size
+ WHERE size(allStatements) >= $minSize
+
+ // Step 3: Check if any statements already have a permanent clusterId
+ WITH tempId, allStatements,
+ [stmt IN allStatements WHERE stmt.clusterId IS NOT NULL] as existingClustered,
+ [stmt IN allStatements WHERE stmt.clusterId IS NULL] as newStatements
+
+ // Step 4: Determine the final cluster ID
+ WITH tempId, allStatements, existingClustered, newStatements,
+ CASE
+ WHEN size(existingClustered) > 0 THEN existingClustered[0].clusterId
+ ELSE toString(randomUUID())
+ END as finalClusterId
+
+ // Step 5: Assign cluster ID to new statements (handles empty arrays gracefully)
+ FOREACH (stmt IN newStatements |
+ SET stmt.clusterId = finalClusterId
+ REMOVE stmt.tempClusterId
+ )
+
+ // Step 6: Clean up temp IDs from existing statements
+ FOREACH (existingStmt IN existingClustered |
+ REMOVE existingStmt.tempClusterId
+ )
+
+ RETURN count(DISTINCT finalClusterId) as validClusters
+ `;
+
+ const result = await runQuery(filterQuery, {
+ userId,
+ minSize: this.MIN_CLUSTER_SIZE,
+ });
+
+ // Remove temp cluster IDs from statements that didn't meet minimum size
+ await runQuery(
+ `
+ MATCH (s:Statement)
+ WHERE s.userId = $userId AND s.tempClusterId IS NOT NULL
+ REMOVE s.tempClusterId
+ `,
+ { userId },
+ );
+
+ const validClusters = result[0]?.get("validClusters") || 0;
+
+ if (incremental) {
+ await this.updateClusterEmbeddings(userId);
+ }
+ logger.info(
+ `${incremental ? "Updated" : "Created"} ${validClusters} valid clusters after size filtering`,
+ );
+ }
+
+ /**
+ * Create Cluster nodes with metadata (hybrid storage approach)
+ * Only creates cluster nodes for cluster IDs that don't already exist
+ */
+ async createClusterNodes(userId: string): Promise {
+ logger.info("Creating cluster metadata nodes for new clusters only");
+
+ const query = `
+ MATCH (s:Statement)
+ WHERE s.userId = $userId AND s.clusterId IS NOT NULL
+ WITH s.clusterId as clusterId, collect(s) as statements
+
+ // Only process cluster IDs that don't already have a Cluster node
+ WHERE NOT EXISTS {
+ MATCH (existing:Cluster {uuid: clusterId, userId: $userId})
+ }
+
+ // Get representative entities for naming
+ UNWIND statements as stmt
+ MATCH (stmt)-[:HAS_SUBJECT]->(subj:Entity)
+ MATCH (stmt)-[:HAS_PREDICATE]->(pred:Entity)
+ MATCH (stmt)-[:HAS_OBJECT]->(obj:Entity)
+
+ WITH clusterId, statements,
+ collect(DISTINCT subj.name) as subjects,
+ collect(DISTINCT pred.name) as predicates,
+ collect(DISTINCT obj.name) as objects
+
+ // Get top 10 most frequent entities of each type
+ WITH clusterId, statements,
+ apoc.coll.frequencies(subjects)[0..10] as topSubjects,
+ apoc.coll.frequencies(predicates)[0..10] as topPredicates,
+ apoc.coll.frequencies(objects)[0..10] as topObjects
+
+ // Calculate cluster embedding as average of statement embeddings
+ WITH clusterId, statements, topSubjects, topPredicates, topObjects,
+ [stmt IN statements WHERE stmt.factEmbedding IS NOT NULL | stmt.factEmbedding] as validEmbeddings
+
+ // Calculate average embedding (centroid)
+ WITH clusterId, statements, topSubjects, topPredicates, topObjects, validEmbeddings,
+ CASE
+ WHEN size(validEmbeddings) > 0 THEN
+ reduce(avg = [i IN range(0, size(validEmbeddings[0])-1) | 0.0],
+ embedding IN validEmbeddings |
+ [i IN range(0, size(embedding)-1) | avg[i] + embedding[i] / size(validEmbeddings)])
+ ELSE null
+ END as clusterEmbedding
+
+ CREATE (c:Cluster {
+ uuid: clusterId,
+ size: size(statements),
+ createdAt: datetime(),
+ userId: $userId,
+ topSubjects: [item in topSubjects | item.item],
+ topPredicates: [item in topPredicates | item.item],
+ topObjects: [item in topObjects | item.item],
+ clusterEmbedding: clusterEmbedding,
+ embeddingCount: size(validEmbeddings),
+ needsNaming: true
+ })
+
+ RETURN count(c) as clustersCreated
+ `;
+
+ const result = await runQuery(query, { userId });
+ const clustersCreated = result[0]?.get("clustersCreated") || 0;
+
+ logger.info(`Created ${clustersCreated} new cluster metadata nodes`);
+
+ // Only generate names for new clusters (those with needsNaming = true)
+ if (clustersCreated > 0) {
+ await this.generateClusterNames(userId);
+ }
+ }
+
+ /**
+ * Calculate TF-IDF scores for a specific cluster
+ *
+ * Uses cluster-based document frequency (not statement-based) for optimal cluster naming:
+ * - TF: How often a term appears within this specific cluster
+ * - DF: How many clusters (not statements) contain this term
+ * - IDF: log(total_clusters / clusters_containing_term)
+ *
+ * This approach identifies terms that are frequent in THIS cluster but rare across OTHER clusters,
+ * making them highly distinctive for cluster naming and differentiation.
+ *
+ * Example: "SOL" appears in 100/100 statements in Cluster A, but only 1/10 total clusters
+ * - Cluster-based IDF: log(10/1) = high distinctiveness ✓ (good for naming)
+ * - Statement-based IDF: log(1000/100) = lower distinctiveness (less useful for naming)
+ */
+ private async calculateClusterTFIDFForCluster(
+ userId: string,
+ targetClusterId: string,
+ ): Promise<{
+ subjects: Array<{ term: string; score: number }>;
+ predicates: Array<{ term: string; score: number }>;
+ objects: Array<{ term: string; score: number }>;
+ } | null> {
+ // Get all clusters and their entity frequencies (needed for cluster-based IDF calculation)
+ // We need ALL clusters to calculate how rare each term is across the cluster space
+ const allClustersQuery = `
+ MATCH (s:Statement)
+ WHERE s.userId = $userId AND s.clusterId IS NOT NULL
+ MATCH (s)-[:HAS_SUBJECT]->(subj:Entity)
+ MATCH (s)-[:HAS_PREDICATE]->(pred:Entity)
+ MATCH (s)-[:HAS_OBJECT]->(obj:Entity)
+ WITH s.clusterId as clusterId,
+ collect(DISTINCT subj.name) as subjects,
+ collect(DISTINCT pred.name) as predicates,
+ collect(DISTINCT obj.name) as objects
+ RETURN clusterId, subjects, predicates, objects
+ `;
+
+ const allClusters = await runQuery(allClustersQuery, {
+ userId,
+ });
+
+ // Build document frequency maps from all clusters
+ // DF = number of clusters that contain each term (not number of statements)
+ const subjectDF = new Map();
+ const predicateDF = new Map();
+ const objectDF = new Map();
+ const totalClusters = allClusters.length;
+
+ // Calculate cluster-based document frequencies
+ // For each term, count how many different clusters it appears in
+ for (const record of allClusters) {
+ const subjects = (record.get("subjects") as string[]) || [];
+ const predicates = (record.get("predicates") as string[]) || [];
+ const objects = (record.get("objects") as string[]) || [];
+
+ // Count unique terms per cluster (each cluster contributes max 1 to DF for each term)
+ new Set(subjects).forEach((term) => {
+ subjectDF.set(term, (subjectDF.get(term) || 0) + 1);
+ });
+ new Set(predicates).forEach((term) => {
+ predicateDF.set(term, (predicateDF.get(term) || 0) + 1);
+ });
+ new Set(objects).forEach((term) => {
+ objectDF.set(term, (objectDF.get(term) || 0) + 1);
+ });
+ }
+
+ // Find the target cluster data for TF calculation
+ const targetCluster = allClusters.find(
+ (record) => record.get("clusterId") === targetClusterId,
+ );
+
+ if (!targetCluster) {
+ return null;
+ }
+
+ const subjects = (targetCluster.get("subjects") as string[]) || [];
+ const predicates = (targetCluster.get("predicates") as string[]) || [];
+ const objects = (targetCluster.get("objects") as string[]) || [];
+
+ // Calculate term frequencies within this specific cluster
+ // TF = how often each term appears in this cluster's statements
+ const subjectTF = new Map();
+ const predicateTF = new Map();
+ const objectTF = new Map();
+
+ subjects.forEach((term) => {
+ subjectTF.set(term, (subjectTF.get(term) || 0) + 1);
+ });
+ predicates.forEach((term) => {
+ predicateTF.set(term, (predicateTF.get(term) || 0) + 1);
+ });
+ objects.forEach((term) => {
+ objectTF.set(term, (objectTF.get(term) || 0) + 1);
+ });
+
+ // Calculate TF-IDF scores using cluster-based document frequency
+ // Higher scores = terms frequent in THIS cluster but rare across OTHER clusters
+ const calculateTFIDF = (
+ tf: Map,
+ df: Map,
+ totalTerms: number,
+ ) => {
+ return Array.from(tf.entries())
+ .map(([term, freq]) => {
+ // TF: Normalized frequency within this cluster
+ const termFreq = freq / totalTerms;
+
+ // DF: Number of clusters containing this term
+ const docFreq = df.get(term) || 1;
+
+ // IDF: Inverse document frequency (cluster-based)
+ // Higher when term appears in fewer clusters
+ const idf = Math.log(totalClusters / docFreq);
+
+ // TF-IDF: Final distinctiveness score
+ const tfidf = termFreq * idf;
+
+ return { term, score: tfidf };
+ })
+ .sort((a, b) => b.score - a.score)
+ .slice(0, 10); // Top 10 most distinctive terms
+ };
+
+ return {
+ subjects: calculateTFIDF(subjectTF, subjectDF, subjects.length),
+ predicates: calculateTFIDF(predicateTF, predicateDF, predicates.length),
+ objects: calculateTFIDF(objectTF, objectDF, objects.length),
+ };
+ }
+
+ /**
+ * Generate cluster names using LLM based on TF-IDF analysis
+ */
+ private async generateClusterNames(userId: string): Promise {
+ logger.info("Generating cluster names using TF-IDF analysis");
+
+ const getClustersQuery = `
+ MATCH (c:Cluster)
+ WHERE c.userId = $userId AND c.needsNaming = true
+ RETURN c.uuid as clusterId, c.size as size
+ `;
+
+ const clusters = await runQuery(getClustersQuery, { userId });
+
+ for (const record of clusters) {
+ const clusterId = record.get("clusterId");
+ const size = record.get("size");
+
+ // Calculate TF-IDF only for this specific cluster
+ const tfidfData = await this.calculateClusterTFIDFForCluster(
+ userId,
+ clusterId,
+ );
+ if (!tfidfData) {
+ logger.warn(`No TF-IDF data found for cluster ${clusterId}`);
+ continue;
+ }
+
+ const namingPrompt = this.createTFIDFClusterNamingPrompt({
+ ...tfidfData,
+ size,
+ });
+
+ let responseText = "";
+ await makeModelCall(false, namingPrompt, (text) => {
+ responseText = text;
+ });
+
+ try {
+ const outputMatch = responseText.match(/