mirror of
https://github.com/eliasstepanik/core.git
synced 2026-01-11 22:08:27 +00:00
feat: implement PostHog metrics tracking for search and ingest operations
- Add server-side PostHog service for backend analytics - Track search operations with query details and result metrics - Track ingest operations at queue, start, and completion stages - Add unit tests for PostHog service - Include detailed metadata for better analytics insights
This commit is contained in:
parent
7ac663b7ff
commit
45f44193ec
@ -8,6 +8,7 @@ import { EpisodeType } from "@core/types";
|
||||
import { prisma } from "~/db.server";
|
||||
import { IngestionStatus } from "@core/database";
|
||||
import { logger } from "~/services/logger.service";
|
||||
import { posthogService } from "~/services/posthog.server";
|
||||
|
||||
const connection = new IORedis({
|
||||
port: env.REDIS_PORT,
|
||||
@ -20,6 +21,12 @@ const userQueues = new Map<string, Queue>();
|
||||
const userWorkers = new Map<string, Worker>();
|
||||
|
||||
async function processUserJob(userId: string, job: any) {
|
||||
const startTime = Date.now();
|
||||
const episodeLength = job.data.body.episodeBody?.length || 0;
|
||||
const metadata = job.data.body.metadata || {};
|
||||
const source = job.data.body.source;
|
||||
const spaceId = job.data.body.spaceId;
|
||||
|
||||
try {
|
||||
logger.log(`Processing job for user ${userId}`);
|
||||
|
||||
@ -30,6 +37,15 @@ async function processUserJob(userId: string, job: any) {
|
||||
},
|
||||
});
|
||||
|
||||
// Track ingestion start in PostHog
|
||||
posthogService.capture("ingestion_started", userId, {
|
||||
queue_id: job.data.queueId,
|
||||
episode_length: episodeLength,
|
||||
source,
|
||||
space_id: spaceId,
|
||||
...metadata
|
||||
}).catch(error => logger.error("Failed to track ingestion start", { error }));
|
||||
|
||||
const knowledgeGraphService = new KnowledgeGraphService();
|
||||
|
||||
const episodeDetails = await knowledgeGraphService.addEpisode({
|
||||
@ -45,7 +61,18 @@ async function processUserJob(userId: string, job: any) {
|
||||
},
|
||||
});
|
||||
|
||||
// your processing logic
|
||||
// Track successful ingestion in PostHog
|
||||
const processingTime = Date.now() - startTime;
|
||||
posthogService.trackIngestion(userId, episodeLength, {
|
||||
queue_id: job.data.queueId,
|
||||
processing_time_ms: processingTime,
|
||||
source,
|
||||
space_id: spaceId,
|
||||
entity_count: episodeDetails?.entities?.length || 0,
|
||||
statement_count: episodeDetails?.statements?.length || 0,
|
||||
...metadata
|
||||
}, true).catch(error => logger.error("Failed to track ingestion completion", { error }));
|
||||
|
||||
} catch (err: any) {
|
||||
await prisma.ingestionQueue.update({
|
||||
where: { id: job.data.queueId },
|
||||
@ -55,6 +82,17 @@ async function processUserJob(userId: string, job: any) {
|
||||
},
|
||||
});
|
||||
|
||||
// Track failed ingestion in PostHog
|
||||
const processingTime = Date.now() - startTime;
|
||||
posthogService.trackIngestion(userId, episodeLength, {
|
||||
queue_id: job.data.queueId,
|
||||
processing_time_ms: processingTime,
|
||||
error: err.message,
|
||||
source,
|
||||
space_id: spaceId,
|
||||
...metadata
|
||||
}, false).catch(error => logger.error("Failed to track ingestion failure", { error }));
|
||||
|
||||
console.error(`Error processing job for user ${userId}:`, err);
|
||||
}
|
||||
}
|
||||
@ -128,6 +166,16 @@ export const addToQueue = async (
|
||||
},
|
||||
);
|
||||
|
||||
// Track ingestion queue event in PostHog
|
||||
posthogService.capture("ingestion_queued", userId, {
|
||||
queue_id: queuePersist.id,
|
||||
episode_length: body.episodeBody?.length || 0,
|
||||
source: body.source,
|
||||
space_id: body.spaceId,
|
||||
metadata: body.metadata || {},
|
||||
timestamp: new Date().toISOString(),
|
||||
}).catch(error => logger.error("Failed to track ingestion queue event", { error }));
|
||||
|
||||
return {
|
||||
id: jobDetails.id,
|
||||
};
|
||||
|
||||
148
apps/webapp/app/services/posthog.server.test.ts
Normal file
148
apps/webapp/app/services/posthog.server.test.ts
Normal file
@ -0,0 +1,148 @@
|
||||
import { posthogService } from './posthog.server';
|
||||
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
// Mock node-fetch
|
||||
vi.mock('node-fetch');
|
||||
|
||||
// Mock environment variables
|
||||
vi.mock('~/env.server', () => ({
|
||||
env: {
|
||||
POSTHOG_PROJECT_KEY: 'test-api-key',
|
||||
},
|
||||
}));
|
||||
|
||||
// Mock logger
|
||||
vi.mock('./logger.service', () => ({
|
||||
logger: {
|
||||
debug: vi.fn(),
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
log: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
describe('PostHogService', () => {
|
||||
beforeEach(() => {
|
||||
vi.resetAllMocks();
|
||||
|
||||
// Default successful response
|
||||
(fetch as unknown as jest.Mock).mockResolvedValue({
|
||||
ok: true,
|
||||
status: 200,
|
||||
statusText: 'OK',
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should capture events with the correct payload structure', async () => {
|
||||
const userId = 'test-user-id';
|
||||
const event = 'test-event';
|
||||
const properties = { test: 'property' };
|
||||
|
||||
await posthogService.capture(event, userId, properties);
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
expect(fetch).toHaveBeenCalledWith('https://eu.posthog.com/capture/', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': 'Bearer test-api-key',
|
||||
},
|
||||
body: expect.stringContaining(event),
|
||||
});
|
||||
|
||||
const callPayload = JSON.parse((fetch as unknown as jest.Mock).mock.calls[0][1].body);
|
||||
expect(callPayload.api_key).toBe('test-api-key');
|
||||
expect(callPayload.batch).toHaveLength(1);
|
||||
expect(callPayload.batch[0].event).toBe(event);
|
||||
expect(callPayload.batch[0].distinctId).toBe(userId);
|
||||
expect(callPayload.batch[0].properties).toMatchObject({
|
||||
...properties,
|
||||
$lib: 'server',
|
||||
$lib_version: '1.0.0',
|
||||
});
|
||||
});
|
||||
|
||||
it('should track search events with appropriate properties', async () => {
|
||||
const userId = 'test-user-id';
|
||||
const query = 'test search query';
|
||||
const options = { limit: 10 };
|
||||
const resultCounts = { result_count_total: 5 };
|
||||
|
||||
await posthogService.trackSearch(userId, query, options, resultCounts);
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
|
||||
const callPayload = JSON.parse((fetch as unknown as jest.Mock).mock.calls[0][1].body);
|
||||
expect(callPayload.batch[0].event).toBe('search');
|
||||
expect(callPayload.batch[0].distinctId).toBe(userId);
|
||||
expect(callPayload.batch[0].properties).toMatchObject({
|
||||
query,
|
||||
query_length: query.length,
|
||||
limit: 10,
|
||||
result_count_total: 5,
|
||||
});
|
||||
});
|
||||
|
||||
it('should track ingestion events with appropriate properties', async () => {
|
||||
const userId = 'test-user-id';
|
||||
const episodeLength = 1000;
|
||||
const metadata = { source: 'test-source' };
|
||||
const success = true;
|
||||
|
||||
await posthogService.trackIngestion(userId, episodeLength, metadata, success);
|
||||
|
||||
expect(fetch).toHaveBeenCalledTimes(1);
|
||||
|
||||
const callPayload = JSON.parse((fetch as unknown as jest.Mock).mock.calls[0][1].body);
|
||||
expect(callPayload.batch[0].event).toBe('ingestion');
|
||||
expect(callPayload.batch[0].distinctId).toBe(userId);
|
||||
expect(callPayload.batch[0].properties).toMatchObject({
|
||||
episode_length: 1000,
|
||||
source: 'test-source',
|
||||
success: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle fetch errors gracefully', async () => {
|
||||
(fetch as unknown as jest.Mock).mockRejectedValue(new Error('Network error'));
|
||||
|
||||
const result = await posthogService.capture('test-event', 'test-user-id');
|
||||
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle API errors gracefully', async () => {
|
||||
(fetch as unknown as jest.Mock).mockResolvedValue({
|
||||
ok: false,
|
||||
status: 500,
|
||||
statusText: 'Internal Server Error',
|
||||
});
|
||||
|
||||
const result = await posthogService.capture('test-event', 'test-user-id');
|
||||
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('should not send events if no API key is provided', async () => {
|
||||
// Override env mock for this test
|
||||
vi.mock('~/env.server', () => ({
|
||||
env: {
|
||||
POSTHOG_PROJECT_KEY: '',
|
||||
},
|
||||
}), { virtual: true });
|
||||
|
||||
// Need to recreate the service to pick up the new env mock
|
||||
const mockService = new (posthogService.constructor as any)();
|
||||
|
||||
const result = await mockService.capture('test-event', 'test-user-id');
|
||||
|
||||
expect(result).toBe(false);
|
||||
expect(fetch).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
136
apps/webapp/app/services/posthog.server.ts
Normal file
136
apps/webapp/app/services/posthog.server.ts
Normal file
@ -0,0 +1,136 @@
|
||||
import { env } from "~/env.server";
|
||||
import { logger } from "./logger.service";
|
||||
import fetch from "node-fetch";
|
||||
|
||||
interface PostHogEvent {
|
||||
event: string;
|
||||
distinctId: string;
|
||||
properties?: Record<string, any>;
|
||||
timestamp?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Server-side PostHog client for analytics tracking
|
||||
* Provides methods to track events on the server without requiring the client-side JS
|
||||
*/
|
||||
export class PostHogService {
|
||||
private readonly apiKey: string;
|
||||
private readonly host: string;
|
||||
private readonly enabled: boolean;
|
||||
|
||||
constructor() {
|
||||
this.apiKey = env.POSTHOG_PROJECT_KEY;
|
||||
this.host = "https://eu.posthog.com";
|
||||
this.enabled = !!this.apiKey && this.apiKey.length > 0;
|
||||
|
||||
if (!this.enabled) {
|
||||
logger.warn("PostHog tracking is disabled. Set POSTHOG_PROJECT_KEY to enable.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Capture an event in PostHog
|
||||
* @param event Event name
|
||||
* @param distinctId User ID for identification
|
||||
* @param properties Additional properties to track
|
||||
* @returns Promise resolving to true if successful
|
||||
*/
|
||||
public async capture(
|
||||
event: string,
|
||||
distinctId: string,
|
||||
properties: Record<string, any> = {}
|
||||
): Promise<boolean> {
|
||||
if (!this.enabled) return false;
|
||||
if (!distinctId) {
|
||||
logger.warn("PostHog event capture failed: No distinctId provided");
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const eventData: PostHogEvent = {
|
||||
event,
|
||||
distinctId,
|
||||
properties: {
|
||||
...properties,
|
||||
$lib: "server",
|
||||
$lib_version: "1.0.0",
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const response = await fetch(`${this.host}/capture/`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
api_key: this.apiKey,
|
||||
batch: [eventData],
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
logger.error(`PostHog capture failed: ${response.status} ${response.statusText}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
logger.debug(`PostHog event captured: ${event}`, {
|
||||
distinctId,
|
||||
eventName: event
|
||||
});
|
||||
return true;
|
||||
} catch (error) {
|
||||
logger.error("Error sending event to PostHog", { error });
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Track search event in PostHog
|
||||
* @param userId User ID
|
||||
* @param query Search query
|
||||
* @param options Search options
|
||||
* @param resultCounts Result counts
|
||||
* @returns Promise resolving to true if successful
|
||||
*/
|
||||
public async trackSearch(
|
||||
userId: string,
|
||||
query: string,
|
||||
options: Record<string, any> = {},
|
||||
resultCounts: Record<string, number> = {}
|
||||
): Promise<boolean> {
|
||||
return this.capture("search", userId, {
|
||||
query,
|
||||
query_length: query.length,
|
||||
...options,
|
||||
...resultCounts,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Track ingestion event in PostHog
|
||||
* @param userId User ID
|
||||
* @param episodeLength Length of ingested content
|
||||
* @param metadata Additional metadata
|
||||
* @param success Whether ingestion succeeded
|
||||
* @returns Promise resolving to true if successful
|
||||
*/
|
||||
public async trackIngestion(
|
||||
userId: string,
|
||||
episodeLength: number,
|
||||
metadata: Record<string, any> = {},
|
||||
success: boolean = true
|
||||
): Promise<boolean> {
|
||||
return this.capture("ingestion", userId, {
|
||||
episode_length: episodeLength,
|
||||
success,
|
||||
...metadata,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton instance for use across the application
|
||||
export const posthogService = new PostHogService();
|
||||
@ -2,6 +2,7 @@ import { openai } from "@ai-sdk/openai";
|
||||
import type { StatementNode } from "@core/types";
|
||||
import { embed } from "ai";
|
||||
import { logger } from "./logger.service";
|
||||
import { posthogService } from "./posthog.server";
|
||||
import { applyCrossEncoderReranking, applyWeightedRRF } from "./search/rerank";
|
||||
import {
|
||||
getEpisodesByStatements,
|
||||
@ -76,10 +77,37 @@ export class SearchService {
|
||||
|
||||
// 3. Return top results
|
||||
const episodes = await getEpisodesByStatements(filteredResults);
|
||||
return {
|
||||
const results = {
|
||||
episodes: episodes.map((episode) => episode.content),
|
||||
facts: filteredResults.map((statement) => statement.fact),
|
||||
};
|
||||
|
||||
// Track search metrics in PostHog
|
||||
posthogService.trackSearch(userId, query,
|
||||
{
|
||||
limit: opts.limit,
|
||||
max_bfs_depth: opts.maxBfsDepth,
|
||||
valid_at: opts.validAt.toISOString(),
|
||||
include_invalidated: opts.includeInvalidated,
|
||||
has_entity_filters: opts.entityTypes.length > 0,
|
||||
has_predicate_filters: opts.predicateTypes.length > 0,
|
||||
score_threshold: opts.scoreThreshold,
|
||||
min_results: opts.minResults,
|
||||
time_range: opts.startTime && opts.endTime ?
|
||||
(new Date(opts.endTime).getTime() - new Date(opts.startTime || 0).getTime()) / (1000 * 60 * 60 * 24) : null
|
||||
},
|
||||
{
|
||||
result_count_total: results.episodes.length + results.facts.length,
|
||||
result_count_episodes: results.episodes.length,
|
||||
result_count_facts: results.facts.length,
|
||||
result_count_bm25: bm25Results.length,
|
||||
result_count_vector: vectorResults.length,
|
||||
result_count_bfs: bfsResults.length,
|
||||
result_count_after_filtering: filteredResults.length
|
||||
}
|
||||
).catch(error => logger.error("Failed to track search metrics", { error }));
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user