From bbcab159168bc50aba035003f7efc5a476400c5c Mon Sep 17 00:00:00 2001 From: Manoj K Date: Sat, 23 Aug 2025 16:05:41 +0530 Subject: [PATCH] Improve knowledge graph and recall --- apps/webapp/app/env.server.ts | 3 +- apps/webapp/app/routes/api.v1.evaluate.tsx | 121 + apps/webapp/app/routes/api.v1.qa.tsx | 90 + .../app/services/knowledgeGraph.server.ts | 67 +- apps/webapp/app/services/prompts/nodes.ts | 66 +- apps/webapp/app/services/prompts/normalize.ts | 388 +-- .../webapp/app/services/prompts/statements.ts | 131 +- apps/webapp/app/services/search.server.ts | 32 +- apps/webapp/app/services/search/rerank.ts | 94 + apps/webapp/app/trigger/ingest/ingest.ts | 12 +- apps/webapp/package.json | 1 + benchmarks/evaluate_qa.js | 266 +- benchmarks/evaluation_results.json | 2894 ++++++++++++++++- benchmarks/ingest_conversations.js | 18 +- benchmarks/ingestion_status.json | 32 +- integrations/github/pnpm-lock.yaml | 10 +- pnpm-lock.yaml | 952 ++++++ 17 files changed, 4861 insertions(+), 316 deletions(-) create mode 100644 apps/webapp/app/routes/api.v1.evaluate.tsx create mode 100644 apps/webapp/app/routes/api.v1.qa.tsx diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index 98ef69c..bcfe52d 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -80,8 +80,9 @@ const EnvironmentSchema = z.object({ // Model envs MODEL: z.string().default(LLMModelEnum.GPT41), - EMBEDDING_MODEL: z.string().default("bge-m3"), + EMBEDDING_MODEL: z.string().default("mxbai-embed-large"), OLLAMA_URL: z.string().optional(), + COHERE_API_KEY: z.string().optional(), }); export type Environment = z.infer; diff --git a/apps/webapp/app/routes/api.v1.evaluate.tsx b/apps/webapp/app/routes/api.v1.evaluate.tsx new file mode 100644 index 0000000..92538d6 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.evaluate.tsx @@ -0,0 +1,121 @@ +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { makeModelCall } from "~/lib/model.server"; +import { json } from "@remix-run/node"; + +export const EvaluateBodyRequest = z.object({ + question: z.string(), + standard_answer: z.string(), + generated_answer: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + body: EvaluateBodyRequest, + allowJWT: true, + authorization: { + action: "search", // Using same permission as search + }, + corsStrategy: "all", + }, + async ({ body, authentication: _ }) => { + const { question, standard_answer, generated_answer } = body; + + const evaluationPrompt = `Your task is to label an answer to a question as 'CORRECT' or 'WRONG'. You will be given the following data: + (1) a question (posed by one user to another user), + (2) a 'gold' (ground truth) answer, + (3) a generated answer +which you will score as CORRECT/WRONG. + +The point of the question is to ask about something one user should know about the other user based on their prior conversations. +The gold answer will usually be a concise and short answer that includes the referenced topic, for example: +Question: Do you remember what I got the last time I went to Hawaii? +Gold answer: A shell necklace +The generated answer might be much longer, but you should be generous with your grading - as long as it touches on the same topic as the gold answer, it should be counted as CORRECT. + +For time related questions, the gold answer will be a specific date, month, year, etc. The generated answer might be much longer or use relative time references (like "last Tuesday" or "next month"), but you should be generous with your grading - as long as it refers to the same date or time period as the gold answer, it should be counted as CORRECT. Even if the format differs (e.g., "May 7th" vs "7 May"), consider it CORRECT if it's the same date. + +Now it's time for the real question: +Question: ${question} +Gold answer: ${standard_answer} +Generated answer: ${generated_answer} + +First, provide a short (one sentence) explanation of your reasoning, then finish with CORRECT or WRONG. +Do NOT include both CORRECT and WRONG in your response, or it will break the evaluation script. + +Just return the label CORRECT or WRONG in a json format with the key as "label".`; + + try { + // Use the LLM to evaluate the answer + const llmResponse = await makeModelCall( + false, // Don't stream + [{ role: "user", content: evaluationPrompt }], + (_text: string, _model: string) => { + // onFinish callback - we can log model usage here if needed + } + ) as string; + + // Parse the LLM response to extract the label + const response = llmResponse.trim(); + let label = "WRONG"; + let reasoning = response; + + // Try to parse as JSON first + try { + const jsonResponse = JSON.parse(response); + if (jsonResponse.label && (jsonResponse.label === "CORRECT" || jsonResponse.label === "WRONG")) { + label = jsonResponse.label; + reasoning = jsonResponse.reasoning || response; + } + } catch (jsonError) { + // If not JSON, look for CORRECT/WRONG in the text + if (response.includes("CORRECT") && !response.includes("WRONG")) { + label = "CORRECT"; + } else if (response.includes("WRONG") && !response.includes("CORRECT")) { + label = "WRONG"; + } + // Extract reasoning (everything before the final CORRECT/WRONG) + const parts = response.split(/(CORRECT|WRONG)$/); + if (parts.length > 1) { + reasoning = parts[0].trim(); + } + } + + // Calculate match ratio for additional metrics + const generatedLower = generated_answer.toLowerCase(); + const standardLower = standard_answer.toString().toLowerCase(); + const standardWords = standardLower.split(/\s+/).filter(word => word.length > 2); + const matchingWords = standardWords.filter(word => generatedLower.includes(word)); + const matchRatio = standardWords.length > 0 ? matchingWords.length / standardWords.length : 0; + + return json({ + label: label, + reasoning: reasoning, + matchRatio: matchRatio, + method: "llm" + }); + + } catch (error) { + console.error("Error in LLM evaluation:", error); + + // Fallback to heuristic evaluation + const generatedLower = generated_answer.toLowerCase(); + const standardLower = standard_answer.toString().toLowerCase(); + + const standardWords = standardLower.split(/\s+/).filter(word => word.length > 2); + const matchingWords = standardWords.filter(word => generatedLower.includes(word)); + const matchRatio = standardWords.length > 0 ? matchingWords.length / standardWords.length : 0; + + const isCorrect = matchRatio > 0.3; // If 30% of important words match + + return json({ + label: isCorrect ? "CORRECT" : "WRONG", + reasoning: `Generated answer ${isCorrect ? 'contains' : 'does not contain'} sufficient matching content with the gold standard (${matchRatio.toFixed(2)} match ratio)`, + matchRatio: matchRatio, + method: "heuristic_fallback" + }); + } + }, +); + +export { action, loader }; \ No newline at end of file diff --git a/apps/webapp/app/routes/api.v1.qa.tsx b/apps/webapp/app/routes/api.v1.qa.tsx new file mode 100644 index 0000000..9646da3 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.qa.tsx @@ -0,0 +1,90 @@ +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { SearchService } from "~/services/search.server"; +import { makeModelCall } from "~/lib/model.server"; +import { json } from "@remix-run/node"; + +export const QABodyRequest = z.object({ + question: z.string(), + startTime: z.string().optional(), + endTime: z.string().optional(), + spaceId: z.string().optional(), + limit: z.number().optional(), + maxBfsDepth: z.number().optional(), + includeInvalidated: z.boolean().optional(), + entityTypes: z.array(z.string()).optional(), + scoreThreshold: z.number().optional(), + minResults: z.number().optional(), +}); + +const searchService = new SearchService(); +const { action, loader } = createActionApiRoute( + { + body: QABodyRequest, + allowJWT: true, + authorization: { + action: "search", + }, + corsStrategy: "all", + }, + async ({ body, authentication }) => { + // First, search for relevant information + const searchResults = await searchService.search( + body.question, + authentication.userId, + { + startTime: body.startTime ? new Date(body.startTime) : undefined, + endTime: body.endTime ? new Date(body.endTime) : undefined, + limit: body.limit || 20, // Get more results for better context + maxBfsDepth: body.maxBfsDepth, + includeInvalidated: body.includeInvalidated, + entityTypes: body.entityTypes, + scoreThreshold: body.scoreThreshold, + minResults: body.minResults, + }, + ); + + // Combine episodes and facts into context + const context = [...searchResults.episodes, ...searchResults.facts].join("\n\n"); + + console.log("Context:", context); + + if (!context.trim()) { + return json({ + question: body.question, + generated_answer: "I couldn't find any relevant information to answer this question.", + }); + } + + // Generate answer using LLM + const prompt = `Based on the following context information, please answer the question. Be specific and concise, using only information from the provided context. If the context doesn't contain enough information to answer the question, say so. + +Context: +${context} + +Question: ${body.question} + +Answer:`; + + let generatedAnswer = ""; + try { + generatedAnswer = await makeModelCall( + false, // Don't stream + [{ role: "user", content: prompt }], + (_text: string, _model: string) => { + // onFinish callback - we can log model usage here if needed + } + ) as string; + } catch (error) { + console.error("Error generating answer:", error); + generatedAnswer = "I encountered an error while generating an answer to this question."; + } + + return json({ + question: body.question, + generated_answer: generatedAnswer.trim(), + }); + }, +); + +export { action, loader }; \ No newline at end of file diff --git a/apps/webapp/app/services/knowledgeGraph.server.ts b/apps/webapp/app/services/knowledgeGraph.server.ts index 4ef6022..1722a18 100644 --- a/apps/webapp/app/services/knowledgeGraph.server.ts +++ b/apps/webapp/app/services/knowledgeGraph.server.ts @@ -112,13 +112,17 @@ export class KnowledgeGraphService { sessionContext, ); + console.log("Normalized episode body:", normalizedEpisodeBody); const relatedEpisodesEntities = await getRelatedEpisodesEntities({ embedding: await this.getEmbedding(normalizedEpisodeBody), userId: params.userId, minSimilarity: 0.7, }); - if (normalizedEpisodeBody === "NOTHING_TO_REMEMBER") { + if ( + normalizedEpisodeBody === "NOTHING_TO_REMEMBER" || + normalizedEpisodeBody === "" + ) { logger.log("Nothing to remember"); return { episodeUuid: null, @@ -161,6 +165,15 @@ export class KnowledgeGraphService { episode, ); + console.log( + "Categorized entities:", + categorizedEntities.primary.map( + (entity) => `primary: ${entity.name} - ${entity.type}`, + ), + categorizedEntities.expanded.map( + (entity) => `expanded: ${entity.name} - ${entity.type}`, + ), + ); // Step 4: Statement Extrraction - Extract statements (triples) instead of direct edges const extractedStatements = await this.extractStatements( episode, @@ -416,13 +429,32 @@ export class KnowledgeGraphService { const predicateNode = predicateMap.get(triple.predicate.toLowerCase()); if (subjectNode && objectNode && predicateNode) { + // Determine the correct validAt date (when the fact actually occurred/occurs) + let validAtDate = episode.validAt; // Default fallback to episode date + + // Check if statement has event_date indicating when the fact actually happened/happens + if (triple.attributes?.event_date) { + try { + const eventDate = new Date(triple.attributes.event_date); + // Use the event date as validAt (when the fact is actually true) + if (!isNaN(eventDate.getTime())) { + validAtDate = eventDate; + } + } catch (error) { + // If parsing fails, use episode validAt as fallback + logger.log( + `Failed to parse event_date: ${triple.attributes.event_date}, using episode validAt`, + ); + } + } + // Create a statement node const statement: StatementNode = { uuid: crypto.randomUUID(), fact: triple.fact, factEmbedding: factEmbeddings[tripleIndex], - createdAt: new Date(), - validAt: episode.validAt, + createdAt: new Date(), // System timestamp when we processed this + validAt: validAtDate, // When the fact actually happened/happens invalidAt: null, attributes: triple.attributes || {}, userId: episode.userId, @@ -920,10 +952,12 @@ export class KnowledgeGraphService { responseText = text; }); + console.log("Response text:", responseText); try { // Extract the JSON response from the output tags const jsonMatch = responseText.match(/([\s\S]*?)<\/output>/); const analysisResult = jsonMatch ? JSON.parse(jsonMatch[1]) : []; + console.log("Analysis result:", analysisResult); // Process the analysis results for (const result of analysisResult) { @@ -1110,7 +1144,8 @@ export class KnowledgeGraphService { source, relatedMemories, ingestionRules, - episodeTimestamp: episodeTimestamp?.toISOString(), + episodeTimestamp: + episodeTimestamp?.toISOString() || new Date().toISOString(), sessionContext, }; const messages = normalizePrompt(context); @@ -1122,6 +1157,30 @@ export class KnowledgeGraphService { const outputMatch = responseText.match(/([\s\S]*?)<\/output>/); if (outputMatch && outputMatch[1]) { normalizedEpisodeBody = outputMatch[1].trim(); + } else { + // Log format violation and use fallback + logger.warn("Normalization response missing tags", { + responseText: responseText.substring(0, 200) + "...", + source, + episodeLength: episodeBody.length, + }); + + // Fallback: use raw response if it's not empty and seems meaningful + const trimmedResponse = responseText.trim(); + if ( + trimmedResponse && + trimmedResponse !== "NOTHING_TO_REMEMBER" && + trimmedResponse.length > 10 + ) { + normalizedEpisodeBody = trimmedResponse; + logger.info("Using raw response as fallback for normalization", { + fallbackLength: trimmedResponse.length, + }); + } else { + logger.warn("No usable normalization content found", { + responseText: responseText, + }); + } } return normalizedEpisodeBody; diff --git a/apps/webapp/app/services/prompts/nodes.ts b/apps/webapp/app/services/prompts/nodes.ts index ad725ea..d3075e0 100644 --- a/apps/webapp/app/services/prompts/nodes.ts +++ b/apps/webapp/app/services/prompts/nodes.ts @@ -34,7 +34,8 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr 3. **Exclusions**: - Do NOT extract entities representing relationships or actions (predicates will be handled separately). - - Do NOT extract dates, times, or other temporal information—these will be handled separately. + - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately. + - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm"). 4. **Entity Name Extraction**: - Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers @@ -45,9 +46,37 @@ You are given a conversation context and a CURRENT EPISODE. Your task is to extr - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John") - **NO TYPE SUFFIXES**: Never append the entity type to the entity name +5. **Temporal and Relationship Context Extraction**: + - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years") + - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college") + - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members") + - DO NOT extract absolute dates, timestamps, or specific time points ("June 9, 2023", "3pm", "last Saturday") + - DO NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday") + ## Examples of Correct Entity Extraction: -**CORRECT Examples:** +**TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:** + +✅ **EXTRACT - Relationship Temporal Information:** +- Text: "I've known these friends for 4 years" → Extract: "4 years" (Duration) +- Text: "since I moved from my home country" → Extract: "since moving" (TemporalContext) +- Text: "after that tough breakup" → Extract: "after breakup" (TemporalContext) +- Text: "we've been married for 5 years" → Extract: "5 years" (Duration) +- Text: "during college" → Extract: "during college" (TemporalContext) + +❌ **EXCLUDE - Absolute Dates/Times:** +- Text: "on June 9, 2023" → Don't extract "June 9, 2023" +- Text: "last Saturday" → Don't extract "last Saturday" +- Text: "at 3pm yesterday" → Don't extract "3pm" or "yesterday" +- Text: "next week" → Don't extract "next week" + +**RELATIONSHIP CONTEXT ENTITIES:** +- Text: "my close friends" → Extract: "close friends" (QualifiedGroup) +- Text: "strong support system" → Extract: "support system" (RelationshipType) +- Text: "work colleagues" → Extract: "work colleagues" (ProfessionalGroup) +- Text: "family members" → Extract: "family members" (FamilyGroup) + +**STANDARD ENTITY EXTRACTION:** - Text: "Tesla car" → Name: "Tesla", Type: "Vehicle" - Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product" - Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software" @@ -123,7 +152,8 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli 3. **Exclusions**: - Do NOT extract entities representing relationships or actions (predicates will be handled separately). - - Do NOT extract dates, times, or other temporal information—these will be handled separately. + - Do NOT extract absolute dates, timestamps, or specific time points—these will be handled separately. + - Do NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday", "3pm"). 4. **Entity Name Extraction**: - Extract ONLY the core entity name, WITHOUT any type descriptors or qualifiers @@ -134,9 +164,37 @@ You are given a TEXT. Your task is to extract **entity nodes** mentioned **expli - **FULL NAMES**: Use complete names when available (e.g., "John Smith" not "John") - **NO TYPE SUFFIXES**: Never append the entity type to the entity name +5. **Temporal and Relationship Context Extraction**: + - EXTRACT duration expressions that describe relationship spans ("4 years", "2 months", "5 years") + - EXTRACT temporal context that anchors relationships ("since moving", "after graduation", "during college") + - EXTRACT relationship qualifiers ("close friends", "support system", "work team", "family members") + - DO NOT extract absolute dates, timestamps, or specific time points ("June 9, 2023", "3pm", "last Saturday") + - DO NOT extract relative time expressions that resolve to specific dates ("last week", "yesterday") + ## Examples of Correct Entity Extraction: -**CORRECT Examples:** +**TEMPORAL INFORMATION - What to EXTRACT vs EXCLUDE:** + +✅ **EXTRACT - Relationship Temporal Information:** +- Text: "I've known these friends for 4 years" → Extract: "4 years" (Duration) +- Text: "since I moved from my home country" → Extract: "since moving" (TemporalContext) +- Text: "after that tough breakup" → Extract: "after breakup" (TemporalContext) +- Text: "we've been married for 5 years" → Extract: "5 years" (Duration) +- Text: "during college" → Extract: "during college" (TemporalContext) + +❌ **EXCLUDE - Absolute Dates/Times:** +- Text: "on June 9, 2023" → Don't extract "June 9, 2023" +- Text: "last Saturday" → Don't extract "last Saturday" +- Text: "at 3pm yesterday" → Don't extract "3pm" or "yesterday" +- Text: "next week" → Don't extract "next week" + +**RELATIONSHIP CONTEXT ENTITIES:** +- Text: "my close friends" → Extract: "close friends" (QualifiedGroup) +- Text: "strong support system" → Extract: "support system" (RelationshipType) +- Text: "work colleagues" → Extract: "work colleagues" (ProfessionalGroup) +- Text: "family members" → Extract: "family members" (FamilyGroup) + +**STANDARD ENTITY EXTRACTION:** - Text: "Tesla car" → Name: "Tesla", Type: "Vehicle" - Text: "Google's search engine" → Name: "Google", Type: "Company" + Name: "Search Engine", Type: "Product" - Text: "Microsoft Office suite" → Name: "Microsoft Office", Type: "Software" diff --git a/apps/webapp/app/services/prompts/normalize.ts b/apps/webapp/app/services/prompts/normalize.ts index 6c13e69..babe6bf 100644 --- a/apps/webapp/app/services/prompts/normalize.ts +++ b/apps/webapp/app/services/prompts/normalize.ts @@ -3,229 +3,235 @@ import { type CoreMessage } from "ai"; export const normalizePrompt = ( context: Record, ): CoreMessage[] => { - const sysPrompt = ` -You are C.O.R.E. (Contextual Observation & Recall Engine), a memory extraction system. Convert input information into clear, concise, third-person factual statements that EVOLVE the memory graph by forming new relationships and capturing new information. + const sysPrompt = `You are C.O.R.E. (Contextual Observation & Recall Engine), a smart memory enrichment system. -## Core Processing Philosophy -When related memories are provided, make memory graph evolution your PRIMARY GOAL, NOT information storage: -- **EVOLVE**: Focus on new information that adds relationships or updates existing knowledge -- **CONNECT**: Form explicit relationships between new and existing information -- **FILTER**: Aggressively exclude information already captured in related memories -- **ENHANCE**: Use existing knowledge to clarify new information and form connections +Create ONE enriched sentence that transforms the episode into a contextually-rich memory using SELECTIVE enrichment. -## Memory Processing Guidelines -- Output all memory statements in the third person (e.g., "User prefers...", "The assistant performed...", "The system detected..."). -- Convert input information into clear, concise memory statements. -- Maintain a neutral, factual tone in all memory entries. -- Structure memories as factual statements, not questions. -- Include relevant context and temporal information when available. -- When ingesting from assistant's perspective, capture the complete user-assistant interaction context. + +Evaluate the episode and apply enrichment ONLY where it adds significant value: -## Temporal Resolution -When processing episodes with relative time references, resolve them to absolute dates based on the episode timestamp: -- "yesterday" → resolve to the day before the episode date -- "today" → resolve to the episode date -- "last week" → resolve to the week before the episode date -- "two days ago" → resolve to two days before the episode date -- "this morning/afternoon/evening" → resolve to the episode date with time context +1. PRIMARY FACTS - always preserve the core information from the episode +2. TEMPORAL RESOLUTION - convert relative dates to absolute dates using episode timestamp +3. STRATEGIC ENRICHMENT - add context only for HIGH VALUE cases (see guidelines below) +4. VISUAL CONTENT - capture exact text on signs, objects shown, specific details from images +5. EMOTIONAL PRESERVATION - maintain the tone and feeling of emotional exchanges +6. IDENTITY PRESERVATION - preserve definitional and possessive relationships that establish entity connections -Include these resolved dates in the extracted statements for precise temporal information. -Example: If episode is from May 8th, 2024, and content mentions "yesterday", convert to "on May 7th, 2024". +ENRICHMENT DECISION MATRIX: +- Clear, complete statement → minimal enrichment (just temporal + attribution) +- Unclear references → resolve with context +- Emotional support → preserve feeling, avoid historical dumping +- New developments → connect to ongoing narrative +- Visual content → extract specific details as primary facts + -## Complete Conversational Context -- IMPORTANT: Preserve the complete context of conversations, including BOTH: - - What the user said, asked, or requested - - How the assistant responded or what it suggested - - Any decisions, conclusions, or agreements reached -- Do not focus solely on the assistant's contributions while ignoring user context -- Capture the cause-and-effect relationship between user inputs and assistant responses -- For multi-turn conversations, preserve the logical flow and key points from each turn -- When the user provides information, record that information directly, not just how the assistant used it + +When related memories/previous episodes are provided, evaluate if they improve understanding: -## Node Entity Types +USE CONTEXT when current episode has: +- Unclear pronouns ("she", "it", "they" without clear antecedent) +- Vague references ("the agency", "the event" without definition in current episode) +- Continuation phrases ("following up", "as we discussed") +- Incomplete information that context clarifies + +IGNORE CONTEXT when current episode is: +- Clear and self-contained ("I got a job in New York") +- Simple emotional responses ("Thanks, that's great!") +- Generic encouragement ("You're doing awesome!") +- Complete statements with all necessary information + +DECISION RULE: If the current episode can be understood perfectly without context, don't use it. Only use context when it genuinely clarifies or +resolves ambiguity. + + + +Using episode timestamp as anchor, convert ALL relative time references: +- "yesterday" → calculate exact date (e.g., "June 26, 2023") +- "last week" → date range (e.g., "around June 19-25, 2023") +- "next month" → future date (e.g., "July 2023") +- "recently" → approximate timeframe with uncertainty + + + +For episodes with images/photos, EXTRACT: +- Exact text on signs, posters, labels (e.g., "Trans Lives Matter") +- Objects, people, settings, activities shown +- Specific visual details that add context +Integrate visual content as primary facts, not descriptions. + + + +When related memories are provided, apply SELECTIVE enrichment: + +HIGH VALUE ENRICHMENT (always include): +- Temporal resolution: "last week" → "June 20, 2023" +- Entity disambiguation: "she" → "Caroline" when unclear +- Missing critical context: "the agency" → "Bright Futures Adoption Agency" (first mention only) +- New developments: connecting current facts to ongoing storylines +- Identity-defining possessives: "my X, Y" → preserve the relationship between person and Y as their X +- Definitional phrases: maintain the defining relationship, not just the entity reference +- Origin/source connections: preserve "from my X" relationships + +LOW VALUE ENRICHMENT (usually skip): +- Obvious references: "Thanks, Mel!" doesn't need Melanie's full context +- Support/encouragement statements: emotional exchanges rarely need historical anchoring +- Already clear entities: don't replace pronouns when reference is obvious +- Repetitive context: never repeat the same descriptive phrase within a conversation +- Ongoing conversations: don't re-establish context that's already been set +- Emotional responses: keep supportive statements simple and warm +- Sequential topics: reference previous topics minimally ("recent X" not full description) + +ANTI-BLOAT RULES: +- If the original statement is clear and complete, add minimal enrichment +- Never use the same contextual phrase twice in one conversation +- Focus on what's NEW, not what's already established +- Preserve emotional tone - don't bury feelings in facts +- ONE CONTEXT REFERENCE PER TOPIC: Don't keep referencing "the charity race" with full details +- STOP AT CLARITY: If original meaning is clear, don't add backstory +- AVOID COMPOUND ENRICHMENT: Don't chain multiple contextual additions in one sentence + +CONTEXT FATIGUE PREVENTION: +- After mentioning a topic once with full context, subsequent references should be minimal +- Use "recent" instead of repeating full details: "recent charity race" not "the May 20, 2023 charity race for mental health" +- Focus on CURRENT episode facts, not historical anchoring +- Don't re-explain what's already been established in the conversation + +ENRICHMENT SATURATION RULE: +Once a topic has been enriched with full context in the conversation, subsequent mentions should be minimal: +- First mention: "May 20, 2023 charity race for mental health" +- Later mentions: "the charity race" or "recent race" +- Don't re-explain established context + +IDENTITY AND DEFINITIONAL RELATIONSHIP PRESERVATION: +- Preserve possessive phrases that define relationships: "my X, Y" → "Y, [person]'s X" +- Keep origin/source relationships: "from my X" → preserve the X connection +- Preserve family/professional/institutional relationships expressed through possessives +- Don't reduce identity-rich phrases to simple location/entity references + + + ${context.entityTypes} + -## Ingestion Rules -${context.ingestionRules ? `The following rules apply to content from ${context.source}: + +${ + context.ingestionRules + ? `Apply these rules for content from ${context.source}: ${context.ingestionRules} -IMPORTANT: If the content does NOT satisfy these rules, respond with "NOTHING_TO_REMEMBER" regardless of other criteria.` : 'No specific ingestion rules defined for this source.'} +CRITICAL: If content does NOT satisfy these rules, respond with "NOTHING_TO_REMEMBER" regardless of other criteria.` + : "No specific ingestion rules defined for this source." +} + -## Related Memory Processing Strategy -When related memories are provided, apply this filtering and enhancement strategy: + +RETURN "NOTHING_TO_REMEMBER" if content consists ONLY of: +- Pure generic responses without context ("awesome", "thanks", "okay" with no subject) +- Empty pleasantries with no substance ("how are you", "have a good day") +- Standalone acknowledgments without topic reference ("got it", "will do") +- Truly vague encouragement with no specific subject matter ("great job" with no context) +- Already captured information without new connections +- Technical noise or system messages -### 1. INFORMATION FILTERING (What NOT to Include) -- **Already Captured Facts**: Do not repeat information already present in related memories unless it adds new context -- **Static Relationships**: Skip relationships already established (e.g., "John is co-founder" if already captured) -- **Redundant Details**: Exclude details that don't add new understanding or connections -- **Background Context**: Filter out explanatory information that's already in the memory graph +STORE IN MEMORY if content contains: +- Specific facts, names, dates, or detailed information +- Personal details, preferences, or decisions +- Concrete plans, commitments, or actions +- Visual content with specific details +- Temporal information that can be resolved +- New connections to existing knowledge +- Encouragement that references specific activities or topics +- Statements expressing personal values or beliefs +- Support that's contextually relevant to ongoing conversations +- Responses that reveal relationship dynamics or personal characteristics -### 2. RELATIONSHIP FORMATION (What TO Include) -- **New Connections**: Include explicit relationships between entities mentioned in current and related episodes -- **Evolving Relationships**: Capture changes or updates to existing relationships -- **Cross-Context Links**: Form connections that bridge different contexts or time periods -- **Causal Relationships**: Extract how current information affects or is affected by existing knowledge +MEANINGFUL ENCOURAGEMENT EXAMPLES (STORE these): +- "Taking time for yourself is so important" → Shows personal values about self-care +- "You're doing an awesome job looking after yourself and your family" → Specific topic reference +- "That charity race sounds great" → Contextually relevant support +- "Your future family is gonna be so lucky" → Values-based encouragement about specific situation -### 3. NEW INFORMATION EXTRACTION (Priority Focus) -- **Fresh Facts**: Extract information not present in any related memory -- **Updated Status**: Capture changes to previously captured information -- **New Attributes**: Add additional properties or characteristics of known entities -- **Temporal Updates**: Record time-based changes or progressions -- **Contextual Additions**: Include new contexts or situations involving known entities +EMPTY ENCOURAGEMENT EXAMPLES (DON'T STORE these): +- "Great job!" (no context) +- "Awesome!" (no subject) +- "Keep it up!" (no specific reference) + -### 4. MEMORY GRAPH EVOLUTION PATTERNS -- **Entity Enhancement**: Add new properties to existing entities without repeating known ones -- **Relationship Expansion**: Create new relationship types between known entities -- **Network Growth**: Connect previously isolated memory clusters -- **Knowledge Refinement**: Update or correct existing information with new insights + +HIGH VALUE enrichment: +- Original: "She said yes!" +- Enriched: "On June 27, 2023, Caroline received approval from Bright Futures Agency for her adoption application." +- Why: Resolves unclear pronoun, adds temporal context, identifies the approving entity -## Memory Selection Criteria -Evaluate conversations using these priority categories: +MINIMAL enrichment (emotional support): +- Original: "You'll be an awesome mom! Good luck!" +- Enriched: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother." +- Why: Simple temporal context, preserve emotional tone, no historical dumping -### 1. High Priority (Always Remember) -- **User Preferences**: Explicit likes, dislikes, settings, or preferences -- **Personal Information**: Names, relationships, contact details, important dates -- **Commitments**: Promises, agreements, or obligations made by either party -- **Recurring Patterns**: Regular activities, habits, or routines mentioned -- **Explicit Instructions**: "Remember X" or "Don't forget about Y" statements -- **Important Decisions**: Key choices or conclusions reached +ANTI-BLOAT example (what NOT to do): +- Wrong: "On May 25, 2023, Melanie praised Caroline for her commitment to creating a family for children in need through adoption—supported by the inclusive Adoption Agency whose brochure and signs reading 'new arrival' and 'information and domestic building' Caroline had shared earlier that day—and encouraged her by affirming she would be an awesome mom." +- Right: "On May 25, 2023, Melanie encouraged Caroline about her adoption plans, affirming she would be an awesome mother." -### 2. Medium Priority (Remember if Significant) -- **Task Context**: Background information relevant to ongoing tasks -- **Problem Statements**: Issues or challenges the user is facing -- **Learning & Growth**: Skills being developed, topics being studied -- **Emotional Responses**: Strong reactions to suggestions or information -- **Time-Sensitive Information**: Details that will be relevant for a limited period +CLEAR REFERENCE (minimal enrichment): +- Original: "Thanks, Caroline! The event was really thought-provoking." +- Enriched: "On May 25, 2023, Melanie thanked Caroline and described the charity race as thought-provoking." +- Why: Clear context doesn't need repetitive anchoring -### 3. Low Priority (Rarely Remember) -- **Casual Exchanges**: Greetings, acknowledgments, or social pleasantries -- **Clarification Questions**: Questions asked to understand instructions -- **Immediate Task Execution**: Simple commands and their direct execution -- **Repeated Information**: Content already stored in memory -- **Ephemeral Context**: Information only relevant to the current exchange +CONVERSATION FLOW EXAMPLES: +❌ WRONG (context fatigue): "reinforcing their ongoing conversation about mental health following Melanie's participation in the recent charity race for mental health" +✅ RIGHT (minimal reference): "reinforcing their conversation about mental health" -### 4. Do Not Remember (Forgettable Conversations) -#### Transient Interactions -- **Simple acknowledgments**: "Thanks", "OK", "Got it" -- **Greetings and farewells**: "Hello", "Good morning", "Goodbye", "Talk to you later" -- **Filler conversations**: Small talk about weather with no specific preferences mentioned -- **Routine status updates** without meaningful information: "Still working on it" +❌ WRONG (compound enrichment): "as she begins the process of turning her dream of giving children a loving home into reality and considers specific adoption agencies" +✅ RIGHT (focused): "as she begins pursuing her adoption plans" -#### Redundant Information -- **Repeated requests** for the same information within a short timeframe -- **Clarifications** that don't add new information: "What did you mean by that?" -- **Confirmations** of already established facts: "Yes, as I mentioned earlier..." -- **Information already stored** in memory in the same or similar form +❌ WRONG (over-contextualization): "following her participation in the May 20, 2023 charity race for mental health awareness" +✅ RIGHT (after first mention): "following the recent charity race" -#### Temporary Operational Exchanges -- **System commands** without context: "Open this file", "Run this code" -- **Simple navigational instructions**: "Go back", "Scroll down" -- **Format adjustments**: "Make this bigger", "Change the color" -- **Immediate task execution** without long-term relevance +GENERIC IDENTITY PRESERVATION EXAMPLES: +- Original: "my hometown, Boston" → Enriched: "Boston, [person]'s hometown" +- Original: "my workplace, Google" → Enriched: "Google, [person]'s workplace" +- Original: "my sister, Sarah" → Enriched: "Sarah, [person]'s sister" +- Original: "from my university, MIT" → Enriched: "from MIT, [person]'s university" -#### Low-Information Content -- **Vague statements** without specific details: "That looks interesting" -- **Ambiguous questions** that were later clarified in the conversation -- **Incomplete thoughts** that were abandoned or redirected -- **Hypothetical scenarios** that weren't pursued further +POSSESSIVE + APPOSITIVE PATTERNS (Critical for Relations): +- Original: "my colleague at my office, Microsoft" +- Enriched: "his colleague at Microsoft, David's workplace" +- Why: Preserves both the work relationship AND the employment identity -#### Technical Noise -- **Error messages** or technical issues that were resolved -- **Connection problems** or temporary disruptions -- **Interface feedback**: "Loading...", "Processing complete" -- **Formatting issues** that were corrected +- Original: "my friend from my university, Stanford" +- Enriched: "her friend from Stanford, Lisa's alma mater" +- Why: Establishes both the friendship and educational institution identity -#### Context-Dependent Ephemera -- **Time-sensitive information** that quickly becomes irrelevant: "I'll be back in 5 minutes" -- **Temporary states**: "I'm currently looking at the document" -- **Attention-directing statements** without content: "Look at this part" -- **Intermediate steps** in a process where only the conclusion matters +- Original: "my neighbor in my city, Chicago" +- Enriched: "his neighbor in Chicago, Mark's hometown" +- Why: Maintains both the neighbor relationship and residence identity -### 5. Do Not Remember (Privacy and System Noise) -- **Sensitive Credentials**: Passwords, API keys, tokens, or authentication details -- **Personal Data**: Unless the user explicitly asks to store it -- **System Meta-commentary**: Update notices, version information, system status messages -- **Debug Information**: Logs, error traces, or diagnostic information -- **QA/Troubleshooting**: Conversations clearly intended for testing or debugging purposes -- **Internal Processing**: Comments about the assistant's own thinking process +❌ WRONG (loses relationships): reduces to just entity names without preserving the defining relationship +✅ RIGHT (preserves identity): maintains the possessive/definitional connection that establishes entity relationships + -## Enhanced Processing for Related Memories -When related memories are provided: - -### Step 1: Analyze Existing Knowledge -- Identify all entities, relationships, and facts already captured -- Map the existing knowledge structure -- Note any gaps or areas for enhancement - -### Step 2: Extract Novel Information -- Filter current episode for information NOT in related memories -- Identify new entities, attributes, or relationships -- Focus on information that adds value to the memory graph - -### Step 3: Form Strategic Relationships -- Connect new entities to existing ones through explicit relationships -- Convert implicit connections into explicit memory statements -- Bridge knowledge gaps using new information - -### Step 4: Evolve Existing Knowledge -- Update outdated information with new details -- Add new attributes to known entities -- Expand relationship networks with new connections - -## Making Implicit Relationships Explicit -- **Entity Disambiguation**: When same names appear across contexts, use related memories to clarify relationships -- **Possessive Language**: Convert possessive forms into explicit relationships using related memory context -- **Cross-Reference Formation**: Create explicit links between entities that appear in multiple episodes -- **Temporal Relationship**: Establish time-based connections between related events or decisions - -## Information Prioritization with Related Memories -- **HIGHEST PRIORITY**: New relationships between known entities -- **HIGH PRIORITY**: New attributes or properties of known entities -- **MEDIUM PRIORITY**: New entities with connections to existing knowledge -- **LOW PRIORITY**: Standalone new information without clear connections -- **EXCLUDE**: Information already captured in related memories that doesn't add new connections - -## Output Format -When extracting memory-worthy information: - -1. If nothing meets the criteria for storage (especially after filtering against related memories), respond with exactly: "NOTHING_TO_REMEMBER" - -2. Otherwise, provide a summary that: - - **Prioritizes NEW information**: Focus on facts not present in related memories - - **Emphasizes relationships**: Highlight connections between new and existing information - - **Scales with novelty**: Make length reflect amount of genuinely new, valuable information - - **Uses third person perspective**: Maintain neutral, factual tone - - **Includes specific details**: Include names, dates, numbers when they add new value - - **Avoids redundancy**: Skip information already captured in related memories - - **Forms explicit connections**: Make relationships between entities clear and direct - -## Examples of Memory Graph Evolution - -### Before (Redundant Approach): -Related Memory: "John Smith is the co-founder of TechCorp." -Current Episode: "User discussed project timeline with John, the co-founder." -BAD Output: "User discussed project timeline with John Smith, who is the co-founder of TechCorp." - -### After (Evolution Approach): -Related Memory: "John Smith is the co-founder of TechCorp." -Current Episode: "User discussed project timeline with John, the co-founder." -GOOD Output: "User discussed project timeline with John Smith. The project timeline discussion involved TechCorp's co-founder." - -### Relationship Formation Example: -Related Memory: "User prefers morning meetings." -Current Episode: "User scheduled a meeting with John for 9 AM." -Output: "User scheduled a 9 AM meeting with John Smith, aligning with their preference for morning meetings." - -Process information with related memories by focusing on evolving the memory graph through new connections and information rather than repeating already captured facts. +CRITICAL OUTPUT FORMAT REQUIREMENT: +You MUST wrap your response in tags. This is MANDATORY - no exceptions. +If the episode should be stored in memory: -{{processed_statement}} +{{your_enriched_sentence_here}} -if there is nothing to remember +If there is nothing worth remembering: NOTHING_TO_REMEMBER + +FAILURE TO USE TAGS WILL RESULT IN EMPTY NORMALIZATION AND SYSTEM FAILURE. + +FORMAT EXAMPLES: +✅ CORRECT: On May 25, 2023, Caroline shared her adoption plans with Melanie. +✅ CORRECT: NOTHING_TO_REMEMBER +❌ WRONG: On May 25, 2023, Caroline shared her adoption plans with Melanie. +❌ WRONG: NOTHING_TO_REMEMBER + +ALWAYS include opening and closing tags around your entire response. `; const userPrompt = ` @@ -238,11 +244,11 @@ ${context.source} -${context.episodeTimestamp || 'Not provided'} +${context.episodeTimestamp || "Not provided"} -${context.sessionContext || 'No previous episodes in this session'} +${context.sessionContext || "No previous episodes in this session"} diff --git a/apps/webapp/app/services/prompts/statements.ts b/apps/webapp/app/services/prompts/statements.ts index 38c5f07..25f44ec 100644 --- a/apps/webapp/app/services/prompts/statements.ts +++ b/apps/webapp/app/services/prompts/statements.ts @@ -70,6 +70,9 @@ EXTRACT NEW MEANINGFUL RELATIONSHIPS: * Product-organization relationships (e.g., "Software" "developed by" "Company") * Technical dependencies and usage (e.g., "Application" "uses" "Database") * Hierarchical relationships (e.g., "Manager" "supervises" "Employee") + * Duration relationships (e.g., "Caroline" "has known" "friends" [duration: "4 years"]) + * Temporal sequence relationships (e.g., "Caroline" "met" "friends" [context: "since moving"]) + * Contextual support relationships (e.g., "friends" "supported" "Caroline" [context: "during breakup"]) ## SAME-NAME ENTITY RELATIONSHIP FORMATION When entities share identical names but have different types, CREATE explicit relationship statements: @@ -80,6 +83,19 @@ When entities share identical names but have different types, CREATE explicit re - **MANDATORY**: Always create at least one relationship statement for same-name entities - **CONTEXT-DRIVEN**: Choose predicates that accurately reflect the most likely relationship based on available context +## DURATION AND TEMPORAL CONTEXT ENTITY USAGE +When Duration or TemporalContext entities are available in AVAILABLE ENTITIES: +- **Duration entities** (e.g., "4 years", "2 months") should be used as "duration" attributes in relationship statements +- **TemporalContext entities** (e.g., "since moving", "after breakup") should be used as "temporal_context" attributes +- **DO NOT** use Duration/TemporalContext entities as direct subjects or objects in relationships +- **DO USE** them to enrich relationship statements with temporal information + +EXAMPLES of correct Duration/TemporalContext usage: +- If AVAILABLE ENTITIES contains ["Caroline", "friends", "4 years", "since moving"]: + * CREATE: "Caroline" "has known" "friends" [attributes: {"duration": "4 years", "temporal_context": "since moving"}] + * DO NOT CREATE: "Caroline" "relates to" "4 years" (Duration as object) + * DO NOT CREATE: "since moving" "describes" "friendship" (TemporalContext as subject) + ## PREVIOUS EPISODE FILTERING Before creating any relationship statement: - **CHECK**: Review previous episodes to see if this exact relationship already exists @@ -87,11 +103,24 @@ Before creating any relationship statement: - **ENHANCE**: Only create statements if they add new information or represent updates - **FOCUS**: Prioritize completely new connections not represented in the knowledge graph -ABOUT TEMPORAL INFORMATION: -- For events with dates/times, DO NOT create a separate statement with the event as both source and target. -- Instead, ADD the temporal information directly to the most relevant statement as attributes. -- Example: For "Max married to Tina on January 14", add the timespan to the "married to" relationship. -- If there are multiple statements about an event, choose the most ownership-related one to add the timespan to. +CRITICAL TEMPORAL INFORMATION HANDLING: +- For events with specific dates/times, ALWAYS capture temporal information in statement attributes +- Use the "event_date" attribute to specify when the fact/event actually occurred (not when it was mentioned) +- Use the "temporal_context" attribute for temporal descriptions like "last week", "recently", etc. +- MANDATORY: Use the REFERENCE_TIME to resolve relative temporal expressions to absolute ISO dates +- Calculate event_date by using REFERENCE_TIME as the anchor point for relative time calculations +- Example: For "Max married to Tina on January 14", add: + - "event_date": "January 14" (or fully resolved date if available) + - "temporal_context": "specific date mentioned" +- For recent events: "went camping last week" → add: + - "event_date": "[resolved ISO date ~7 days before episode date, e.g., '2023-06-20T00:00:00.000Z']" + - "temporal_context": "last week" +- For past events: "read book last year" → add: + - "event_date": "[resolved ISO date ~1 year before episode date, e.g., '2022-06-27T00:00:00.000Z']" + - "temporal_context": "last year" +- For future events: "going to Paris next month" → add: + - "event_date": "[resolved ISO date ~1 month after episode date, e.g., '2023-07-27T00:00:00.000Z']" + - "temporal_context": "next month" Format your response as a JSON object with the following structure: @@ -105,8 +134,12 @@ Format your response as a JSON object with the following structure: "targetType": "[Target Entity Type]", "fact": "[Natural language representation of the fact]", "attributes": { - "confidence": confidence of the fact + "confidence": confidence of the fact, "source": "explicit or implicit source type", + "event_date": "ISO date when the fact/event actually occurred (if applicable)", + "temporal_context": "original temporal description (e.g., 'last week', 'recently')", + "duration": "duration information from Duration entities (e.g., '4 years', '2 months')", + "context": "contextual information from TemporalContext entities (e.g., 'since moving', 'after breakup')" } } ] @@ -129,11 +162,18 @@ If AVAILABLE ENTITIES contains ["John", "Max", "Wedding", "John (Company)"], you - "Max" "married to" "Tina" with timespan attribute ✓ (if new relationship) - "John" "founded" "John (Company)" ✓ (PRIORITY: same name, different types) +Example of CORRECT Duration/TemporalContext usage: +If AVAILABLE ENTITIES contains ["Caroline", "friends", "4 years", "since moving", "breakup"]: +- "Caroline" "has known" "friends" [attributes: {"duration": "4 years", "context": "since moving"}] ✓ +- "friends" "supported" "Caroline" [attributes: {"context": "during breakup"}] ✓ +- "Caroline" "met" "friends" [attributes: {"context": "since moving"}] ✓ + Example of INCORRECT usage: - "John" "attends" "Party" ✗ (if "Party" is not in AVAILABLE ENTITIES) - "Marriage" "occurs on" "Marriage" ✗ (NEVER create self-loops) - "John" "attends" "Wedding" ✗ (if already captured in previous episodes) -- "January 14" "is" "Marriage date" ✗ (if "January 14" or "Marriage date" is not in AVAILABLE ENTITIES)`, +- "Caroline" "relates to" "4 years" ✗ (Duration entity used as direct object) +- "since moving" "describes" "friendship" ✗ (TemporalContext entity used as direct subject)`, }, { role: "user", @@ -171,9 +211,12 @@ export const resolveStatementPrompt = ( return [ { role: "system", - content: `You are a knowledge graph expert that analyzes statements to detect duplications and contradictions. -You analyze multiple new statements against existing statements to determine whether the new statement duplicates any existing statement or contradicts any existing statement. -Pay special attention to temporal aspects, event updates, and context changes. If an event changes (like a date shift), statements about the original event are likely contradicted by statements about the updated event. + content: `You are a knowledge graph expert that analyzes statements to detect duplications and TRUE contradictions. +You analyze multiple new statements against existing statements to determine whether the new statement duplicates any existing statement or ACTUALLY contradicts any existing statement. + +CRITICAL: Distinguish between CONTRADICTIONS vs PROGRESSIONS: +- CONTRADICTIONS: Statements that CANNOT both be true (mutually exclusive facts) +- PROGRESSIONS: Sequential states or developments that CAN both be true (e.g., planning → execution, researching → deciding) I need to analyze whether a new statement duplicates or contradicts existing statements in a knowledge graph. @@ -185,32 +228,60 @@ Follow these instructions carefully: - Two statements are duplicates if they express the same meaning even with different wording - Consider entity resolution has already been done, so different entity names are NOT an issue -2. Determine if the new statement contradicts any existing valid statements - - Contradictions occur when statements cannot both be true at the same time - - Pay special attention to negations, opposites, and mutually exclusive facts - - Consider temporal validity - statements may only be contradictions within specific time periods - -3. IMPORTANT: For events that change (like rescheduled appointments, moved dates, changed locations): - - When an event changes date/time/location, new statements about the updated event likely contradict statements about the original event - - Look for contextual clues about event changes, cancellations, or rescheduling - - Example: If "Concert on June 10" moved to "Concert on June 12", then "John attends June 10 concert" contradicts "John doesn't attend June 12 concert" +2. Determine if the new statement ACTUALLY contradicts any existing valid statements + - TRUE CONTRADICTIONS: Statements that cannot both be true simultaneously + - Pay attention to direct negations, opposites, and mutually exclusive facts + - Consider temporal context - statements may be contradictory only within specific time periods + +3. CRITICAL DISTINCTION - What are NOT contradictions: + - PROGRESSIONS: "researching X" → "decided on X" (both can be true - research led to decision) + - TEMPORAL SEQUENCES: "planning camping" → "went camping" (both can be true - plan was executed) + - STATE CHANGES: "single" → "married" (both can be true at different times) + - LEARNING/GROWTH: "studying topic X" → "expert in topic X" (both can be true - progression) + +4. SPECIFIC EXAMPLES: + +TRUE CONTRADICTIONS (mark as contradictions): + - "John lives in New York" vs "John lives in San Francisco" (same time period, can't be both) + - "Meeting at 3pm" vs "Meeting at 5pm" (same meeting, conflicting times) + - "Project completed" vs "Project cancelled" (mutually exclusive outcomes) + - "Caroline is single" vs "Caroline is married" (same time period, opposite states) + +NOT CONTRADICTIONS (do NOT mark as contradictions): + - "Caroline researching adoption agencies" vs "Caroline finalized adoption agency" (research → decision progression) + - "Caroline planning camping next week" vs "Caroline went camping" (planning → execution progression) + - "User studying Python" vs "User completed Python course" (learning progression) + - "Meeting scheduled for 3pm" vs "Meeting was held at 3pm" (planning → execution) + - "Considering job offers" vs "Accepted job offer" (consideration → decision) + +5. MANDATORY OUTPUT FORMAT: + +You MUST wrap your response in tags. Do not include any text outside these tags. - 4. Format your response as a JSON object with the following structure: [{ "statementId": "new_statement_uuid", - "isDuplicate": true/false, - "duplicateId": "existing_statement_uuid-if-duplicate-exists", - "contradictions": ["existing_statement_uuid-1", "existing_statement_uuid-2"], // UUIDs of any contradicted statements - }] + "isDuplicate": false, + "duplicateId": null, + "contradictions": [] + }, + { + "statementId": "another_statement_uuid", + "isDuplicate": true, + "duplicateId": "existing_duplicate_uuid", + "contradictions": ["contradicted_statement_uuid"] + }] - - Important guidelines: + +CRITICAL FORMATTING RULES: +- ALWAYS use and tags +- Include NO text before or after +- Return valid JSON array with all statement IDs from NEW_STATEMENTS - If the new statement is a duplicate, include the UUID of the duplicate statement -- For contradictions, list all statement UUIDs that the new statement contradicts -- If a statement is both a contradiction AND a duplicate (rare case), mark it as a duplicate -- Identify temporal and contextual shifts that may create implicit contradictions -- Don't give any reason, just give the final output. +- For TRUE contradictions only, list statement UUIDs that the new statement contradicts +- If a statement is both a contradiction AND a duplicate (rare case), mark it as a duplicate +- DO NOT mark progressions, temporal sequences, or state developments as contradictions +- ONLY mark genuine mutually exclusive facts as contradictions `, }, { diff --git a/apps/webapp/app/services/search.server.ts b/apps/webapp/app/services/search.server.ts index 286f09c..a5bee95 100644 --- a/apps/webapp/app/services/search.server.ts +++ b/apps/webapp/app/services/search.server.ts @@ -5,6 +5,7 @@ import { applyMultiFactorReranking, applyMultiFactorMMRReranking, applyWeightedRRF, + applyCohereReranking, } from "./search/rerank"; import { getEpisodesByStatements, @@ -15,6 +16,8 @@ import { import { getEmbedding } from "~/lib/model.server"; import { prisma } from "~/db.server"; import { runQuery } from "~/lib/neo4j.server"; +import { env } from "~/env.server"; +import { getEpisodeStatements } from "./graphModels/episode"; /** * SearchService provides methods to search the reified + temporal knowledge graph @@ -36,7 +39,7 @@ export class SearchService { query: string, userId: string, options: SearchOptions = {}, - ): Promise<{ episodes: string[]; facts: string[] }> { + ): Promise<{ episodes: string[]; facts: string[]; relatedFacts: string[] }> { const startTime = Date.now(); // Default options @@ -49,7 +52,7 @@ export class SearchService { includeInvalidated: options.includeInvalidated || false, entityTypes: options.entityTypes || [], predicateTypes: options.predicateTypes || [], - scoreThreshold: options.scoreThreshold || 0.7, + scoreThreshold: options.scoreThreshold || 0.3, minResults: options.minResults || 10, spaceIds: options.spaceIds || [], }; @@ -81,6 +84,17 @@ export class SearchService { // 3. Return top results const episodes = await getEpisodesByStatements(filteredResults); + const relatedFacts: StatementNode[] = []; + await Promise.all( + episodes.map((episode) => { + return getEpisodeStatements({ episodeUuid: episode.uuid, userId }).then( + (facts) => { + relatedFacts.push(...facts); + }, + ); + }), + ); + // Log recall asynchronously (don't await to avoid blocking response) const responseTime = Date.now() - startTime; this.logRecallAsync( @@ -93,11 +107,12 @@ export class SearchService { logger.error("Failed to log recall event:", error); }); - this.updateRecallCount(userId, episodes, filteredResults); + // this.updateRecallCount(userId, episodes, filteredResults); return { episodes: episodes.map((episode) => episode.content), facts: filteredResults.map((statement) => statement.fact), + relatedFacts: relatedFacts.map((fact) => fact.fact), }; } @@ -111,6 +126,10 @@ export class SearchService { ): StatementNode[] { if (results.length === 0) return []; + if (results.length <= 5) { + return results; + } + let isRRF = false; // Extract scores from results const scoredResults = results.map((result) => { @@ -131,6 +150,8 @@ export class SearchService { score = (result as any).combinedScore; } else if ((result as any).mmrScore !== undefined) { score = (result as any).mmrScore; + } else if ((result as any).cohereScore !== undefined) { + score = (result as any).cohereScore; } return { result, score }; @@ -229,6 +250,11 @@ export class SearchService { results.bfs.length > 0, ].filter(Boolean).length; + if (env.COHERE_API_KEY) { + logger.info("Using Cohere reranking"); + return applyCohereReranking(query, results, options); + } + // If results are coming from only one source, use cross-encoder reranking if (nonEmptySources <= 1) { logger.info( diff --git a/apps/webapp/app/services/search/rerank.ts b/apps/webapp/app/services/search/rerank.ts index 56e24c5..cebe560 100644 --- a/apps/webapp/app/services/search/rerank.ts +++ b/apps/webapp/app/services/search/rerank.ts @@ -3,6 +3,7 @@ import { combineAndDeduplicateStatements } from "./utils"; import { type CoreMessage } from "ai"; import { makeModelCall } from "~/lib/model.server"; import { logger } from "../logger.service"; +import { CohereClientV2 } from "cohere-ai"; // Utility function to safely convert BigInt values to Number function safeNumber(value: any): number { @@ -439,3 +440,96 @@ export function applyMultiFactorReranking(results: { return sortedResults; } + +/** + * Apply Cohere Rerank 3.5 to search results for improved question-to-fact matching + * This is particularly effective for bridging the semantic gap between questions and factual statements + */ +export async function applyCohereReranking( + query: string, + results: { + bm25: StatementNode[]; + vector: StatementNode[]; + bfs: StatementNode[]; + }, + options?: { + limit?: number; + model?: string; + }, +): Promise { + const { model = "rerank-v3.5" } = options || {}; + const limit = 100; + + try { + const startTime = Date.now(); + // Combine and deduplicate all results + const allResults = [ + ...results.bm25.slice(0, 100), + ...results.vector.slice(0, 100), + ...results.bfs.slice(0, 100), + ]; + const uniqueResults = combineAndDeduplicateStatements(allResults); + console.log("Unique results:", uniqueResults.length); + + if (uniqueResults.length === 0) { + logger.info("No results to rerank with Cohere"); + return []; + } + + // Check for API key + const apiKey = process.env.COHERE_API_KEY; + if (!apiKey) { + logger.warn("COHERE_API_KEY not found, falling back to original results"); + return uniqueResults.slice(0, limit); + } + + // Initialize Cohere client + const cohere = new CohereClientV2({ + token: apiKey, + }); + + // Prepare documents for Cohere API + const documents = uniqueResults.map((statement) => statement.fact); + + logger.info( + `Cohere reranking ${documents.length} statements with model ${model}`, + ); + + // Call Cohere Rerank API + const response = await cohere.rerank({ + query, + documents, + model, + topN: Math.min(limit, documents.length), + }); + + console.log("Cohere reranking billed units:", response.meta?.billedUnits); + + // Map results back to StatementNodes with Cohere scores + const rerankedResults = response.results + .map((result, index) => ({ + ...uniqueResults[result.index], + cohereScore: result.relevanceScore, + cohereRank: index + 1, + })) + .filter((result) => result.cohereScore > 0.3); + + const responseTime = Date.now() - startTime; + console.log( + rerankedResults.map((result) => `${result.fact} - ${result.cohereScore}`), + ); + logger.info( + `Cohere reranking completed: ${rerankedResults.length} results returned in ${responseTime}ms`, + ); + + return rerankedResults; + } catch (error) { + logger.error("Cohere reranking failed:", { error }); + + // Graceful fallback to original results + const allResults = [...results.bm25, ...results.vector, ...results.bfs]; + const uniqueResults = combineAndDeduplicateStatements(allResults); + + return uniqueResults.slice(0, limit); + } +} diff --git a/apps/webapp/app/trigger/ingest/ingest.ts b/apps/webapp/app/trigger/ingest/ingest.ts index 79d635c..5460252 100644 --- a/apps/webapp/app/trigger/ingest/ingest.ts +++ b/apps/webapp/app/trigger/ingest/ingest.ts @@ -70,12 +70,12 @@ export const ingestTask = task({ episodeId: episodeDetails?.episodeUuid, }); if (episodeDetails.episodeUuid) { - await triggerSpaceAssignment({ - userId: payload.userId, - workspaceId: payload.workspaceId, - mode: "episode", - episodeId: episodeDetails.episodeUuid, - }); + // await triggerSpaceAssignment({ + // userId: payload.userId, + // workspaceId: payload.workspaceId, + // mode: "episode", + // episodeId: episodeDetails.episodeUuid, + // }); } } catch (assignmentError) { // Don't fail the ingestion if assignment fails diff --git a/apps/webapp/package.json b/apps/webapp/package.json index 34223f5..148b846 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -86,6 +86,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "cmdk": "^0.2.1", + "cohere-ai": "^7.18.1", "compression": "^1.7.4", "cross-env": "^7.0.3", "d3": "^7.9.0", diff --git a/benchmarks/evaluate_qa.js b/benchmarks/evaluate_qa.js index fbba684..916fc53 100755 --- a/benchmarks/evaluate_qa.js +++ b/benchmarks/evaluate_qa.js @@ -3,6 +3,7 @@ const fs = require("fs"); const path = require("path"); const axios = require("axios"); + /** * LOCOMO Q&A Evaluation Script * Evaluates question answering against ingested LOCOMO conversations @@ -13,7 +14,7 @@ class LocomoEvaluator { constructor(baseUrl = "http://localhost:3033") { this.baseUrl = baseUrl; this.headers = { - Authorization: "Bearer rc_pat_92bdumc45dwwmfxrr4xy2bk96pstt1j7opj6t412", + Authorization: "Bearer rc_pat_kbc76ykt3gd81r6ctyeh8as5jryihbeqqvnsi2wt", }; this.results = []; @@ -21,7 +22,6 @@ class LocomoEvaluator { this.axios = axios.create({ baseURL: this.baseUrl, headers: this.headers, - timeout: 10000, }); } @@ -47,7 +47,6 @@ class LocomoEvaluator { try { const response = await this.makeRequest("/api/v1/search", { query: question, - limit: 10, }); return response; @@ -57,51 +56,81 @@ class LocomoEvaluator { } } + async answerQuestion(question) { + try { + const response = await this.makeRequest("/api/v1/qa", { + question: question, + }); + + return response; + } catch (error) { + console.error("Q&A API error:", error.message); + return { + question: question, + generated_answer: "Error: Could not generate answer", + }; + } + } + + async evaluateAnswer(question, standardAnswer, generatedAnswer) { + const response = await this.makeRequest("/api/v1/evaluate", { + question, + standard_answer: standardAnswer, + generated_answer: generatedAnswer, + }); + + return { + label: response.label, + reasoning: response.reasoning, + matchRatio: response.matchRatio, + evaluationMethod: response.method, + }; + } + async evaluateQuestion(question, expectedAnswer, evidence, conversationId, category) { - // Search for relevant context - const searchResults = await this.searchMemory(question, conversationId); + // NEW: Get generated answer from Q&A API + const qaResponse = await this.answerQuestion(question); + const generatedAnswer = qaResponse.generated_answer || ""; - // Handle different API response formats - const episodes = searchResults.episodes || searchResults.results || []; - - // Extract relevant context - const context = episodes.map((episode) => { - if (typeof episode === 'string') { - return episode; - } - return episode.content || episode.text || episode; - }).join("\n"); - - // Basic relevance scoring - const hasContext = episodes.length > 0; - const contextLength = context.length; - - // Check if expected answer appears in context (simple matching) - const answerInContext = context.toLowerCase().includes(expectedAnswer.toString().toLowerCase()); + // NEW: Evaluate the generated answer against the expected answer + const evaluation = await this.evaluateAnswer(question, expectedAnswer, generatedAnswer); return { question, expectedAnswer, evidence, category, - searchContext: context, - searchResultsCount: episodes.length, - hasContext, - contextLength, - answerInContext, conversationId, - facts: searchResults.facts || [], + generatedAnswer: generatedAnswer, + evaluationResult: evaluation.label, + evaluationReasoning: evaluation.reasoning, + matchRatio: evaluation.matchRatio, + evaluationMethod: evaluation.evaluationMethod, }; } async evaluateConversation(conversation, conversationId) { console.log(`Evaluating conversation ${conversationId}...`); + const batchSize = 15; // Process 15 questions concurrently const qaResults = []; const totalQuestions = conversation.qa.length; + let processed = 0; - for (const [index, qa] of conversation.qa.entries()) { - if (index === 0) { + console.log(`Processing ${totalQuestions} questions in batches of ${batchSize}...`); + + for (let i = 0; i < totalQuestions; i += batchSize) { + const batch = conversation.qa.slice(i, i + batchSize); + const batchStartIndex = i; + + console.log( + `Processing batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(totalQuestions / batchSize)} (questions ${i + 1}-${Math.min(i + batchSize, totalQuestions)})` + ); + + // Create promises for the current batch + const batchPromises = batch.map(async (qa, batchIndex) => { + const questionIndex = batchStartIndex + batchIndex; + console.log(qa.question); try { const result = await this.evaluateQuestion( qa.question, @@ -110,22 +139,54 @@ class LocomoEvaluator { conversationId, qa.category ); - - qaResults.push(result); - - // Progress indicator - if ((index + 1) % 25 === 0) { - console.log(` Evaluated ${index + 1}/${totalQuestions} questions`); - } - - // Small delay to avoid overwhelming the system - await new Promise((resolve) => setTimeout(resolve, 25)); + return { result, index: questionIndex }; } catch (error) { - console.error(`Error evaluating question ${index}:`, error.message); + console.error(`Error evaluating question ${questionIndex + 1}:`, error.message); + return { error: error.message, index: questionIndex, qa }; } - } + }); + + // Process batch concurrently + const batchResults = await Promise.allSettled(batchPromises); + + // Process results from this batch + batchResults.forEach((promiseResult) => { + if (promiseResult.status === "fulfilled") { + const { result, error, index, qa } = promiseResult.value; + if (result) { + qaResults.push(result); + } else if (error) { + // Add a placeholder result for failed evaluations + qaResults.push({ + question: qa.question, + expectedAnswer: qa.answer ? qa.answer.toString() : qa.adversarial_answer.toString(), + evidence: qa.evidence, + category: qa.category, + conversationId, + error: error, + generatedAnswer: "Error: Evaluation failed", + evaluationResult: "ERROR", + evaluationReasoning: `Evaluation failed: ${error}`, + matchRatio: 0, + evaluationMethod: "error", + }); + } + } else { + console.error(`Batch promise rejected:`, promiseResult.reason); + } + }); + + processed += batch.length; + console.log(` Completed ${processed}/${totalQuestions} questions`); + + // Save results periodically (every batch or ~15 questions) + console.log(`Saving intermediate results...`); + this.saveResults(); + + // break; } + console.log(`Completed evaluation of ${totalQuestions} questions`); return qaResults; } @@ -133,7 +194,7 @@ class LocomoEvaluator { console.log("Starting LOCOMO Q&A evaluation..."); // Load LOCOMO dataset - const dataPath = path.join(__dirname, "data", "locomo10.json"); + const dataPath = path.join(__dirname, "locomo10.json"); const conversations = JSON.parse(fs.readFileSync(dataPath, "utf8")); console.log(`Loaded ${conversations.length} conversations for evaluation`); @@ -187,17 +248,57 @@ class LocomoEvaluator { 0 ); + // NEW: Q&A evaluation statistics + const questionsWithGeneratedAnswers = this.results.reduce( + (sum, conv) => + sum + + conv.results.filter( + (r) => r.generatedAnswer && r.generatedAnswer !== "Error: Could not generate answer" + ).length, + 0 + ); + const correctAnswers = this.results.reduce( + (sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "CORRECT").length, + 0 + ); + const wrongAnswers = this.results.reduce( + (sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "WRONG").length, + 0 + ); + const errorAnswers = this.results.reduce( + (sum, conv) => sum + conv.results.filter((r) => r.evaluationResult === "ERROR").length, + 0 + ); + // Category breakdown const categoryStats = {}; this.results.forEach((conv) => { conv.results.forEach((result) => { const cat = result.category || "unknown"; if (!categoryStats[cat]) { - categoryStats[cat] = { total: 0, withContext: 0, withAnswer: 0 }; + categoryStats[cat] = { + total: 0, + withContext: 0, + withAnswer: 0, + withGenerated: 0, + correct: 0, + wrong: 0, + errors: 0, + }; } categoryStats[cat].total++; if (result.hasContext) categoryStats[cat].withContext++; if (result.answerInContext) categoryStats[cat].withAnswer++; + if ( + result.generatedAnswer && + result.generatedAnswer !== "Error: Could not generate answer" && + result.generatedAnswer !== "Error: Evaluation failed" + ) { + categoryStats[cat].withGenerated++; + } + if (result.evaluationResult === "CORRECT") categoryStats[cat].correct++; + if (result.evaluationResult === "WRONG") categoryStats[cat].wrong++; + if (result.evaluationResult === "ERROR") categoryStats[cat].errors++; }); }); @@ -207,6 +308,19 @@ class LocomoEvaluator { questionsWithAnswerInContext, contextRetrievalRate: ((questionsWithContext / totalQuestions) * 100).toFixed(1), answerFoundRate: ((questionsWithAnswerInContext / totalQuestions) * 100).toFixed(1), + // NEW: Q&A evaluation metrics + questionsWithGeneratedAnswers, + correctAnswers, + wrongAnswers, + errorAnswers, + qaSuccessRate: + totalQuestions > 0 + ? ((questionsWithGeneratedAnswers / totalQuestions) * 100).toFixed(1) + : "0.0", + answerAccuracyRate: + questionsWithGeneratedAnswers > 0 + ? ((correctAnswers / questionsWithGeneratedAnswers) * 100).toFixed(1) + : "0.0", categoryBreakdown: categoryStats, }; } @@ -224,11 +338,42 @@ class LocomoEvaluator { `Questions with answer in context: ${stats.questionsWithAnswerInContext}/${stats.totalQuestions} (${stats.answerFoundRate}%)` ); + console.log("\n=== Q&A EVALUATION RESULTS ==="); + console.log( + `Questions with generated answers: ${stats.questionsWithGeneratedAnswers}/${stats.totalQuestions} (${stats.qaSuccessRate}%)` + ); + console.log( + `Correct answers: ${stats.correctAnswers}/${stats.questionsWithGeneratedAnswers} (${stats.answerAccuracyRate}%)` + ); + console.log(`Wrong answers: ${stats.wrongAnswers}/${stats.questionsWithGeneratedAnswers}`); + if (stats.errorAnswers > 0) { + console.log(`Evaluation errors: ${stats.errorAnswers}/${stats.totalQuestions}`); + } + console.log("\n=== CATEGORY BREAKDOWN ==="); - Object.entries(stats.categoryBreakdown).forEach(([category, stats]) => { + Object.entries(stats.categoryBreakdown).forEach(([category, catStats]) => { + const retrievalRate = ((catStats.withAnswer / catStats.total) * 100).toFixed(1); + const qaRate = + catStats.withGenerated > 0 + ? ((catStats.withGenerated / catStats.total) * 100).toFixed(1) + : "0.0"; + const accuracyRate = + catStats.withGenerated > 0 + ? ((catStats.correct / catStats.withGenerated) * 100).toFixed(1) + : "0.0"; + + console.log(`Category ${category}:`); + console.log(` Total questions: ${catStats.total}`); console.log( - `Category ${category}: ${stats.withAnswer}/${stats.total} (${((stats.withAnswer / stats.total) * 100).toFixed(1)}%) answers found` + ` Context retrieval: ${catStats.withAnswer}/${catStats.total} (${retrievalRate}%)` ); + console.log(` Generated answers: ${catStats.withGenerated}/${catStats.total} (${qaRate}%)`); + console.log( + ` Answer accuracy: ${catStats.correct}/${catStats.withGenerated} (${accuracyRate}%)` + ); + if (catStats.errors > 0) { + console.log(` Evaluation errors: ${catStats.errors}/${catStats.total}`); + } }); console.log("\n=== PERFORMANCE INSIGHTS ==="); @@ -239,8 +384,33 @@ class LocomoEvaluator { ) / stats.totalQuestions; console.log(`Average context length: ${avgContextLength.toFixed(0)} characters`); - console.log("\nNote: This evaluation measures retrieval performance. For accuracy scoring,"); - console.log("consider implementing LLM-based answer generation and comparison."); + const avgMatchRatio = + this.results.reduce( + (sum, conv) => sum + conv.results.reduce((s, r) => s + (r.matchRatio || 0), 0), + 0 + ) / stats.totalQuestions; + console.log(`Average answer match ratio: ${avgMatchRatio.toFixed(3)}`); + + // Show evaluation method breakdown + const evaluationMethods = {}; + this.results.forEach((conv) => { + conv.results.forEach((result) => { + const method = result.evaluationMethod || "unknown"; + evaluationMethods[method] = (evaluationMethods[method] || 0) + 1; + }); + }); + + console.log("\n=== EVALUATION SUMMARY ==="); + console.log( + "This evaluation measures both retrieval performance and answer generation accuracy." + ); + console.log("Generated answers are evaluated against gold standard answers."); + + console.log("\n=== EVALUATION METHODS USED ==="); + Object.entries(evaluationMethods).forEach(([method, count]) => { + const percentage = ((count / stats.totalQuestions) * 100).toFixed(1); + console.log(`${method}: ${count}/${stats.totalQuestions} (${percentage}%)`); + }); } } diff --git a/benchmarks/evaluation_results.json b/benchmarks/evaluation_results.json index 9052ee2..71c1057 100644 --- a/benchmarks/evaluation_results.json +++ b/benchmarks/evaluation_results.json @@ -1,16 +1,62 @@ { - "timestamp": "2025-08-11T15:08:14.955Z", + "timestamp": "2025-08-19T11:35:51.144Z", "summary": { "totalQuestions": 199, - "questionsWithContext": 1, + "questionsWithContext": 0, "questionsWithAnswerInContext": 0, - "contextRetrievalRate": "0.5", + "contextRetrievalRate": "0.0", "answerFoundRate": "0.0", + "questionsWithGeneratedAnswers": 199, + "correctAnswers": 116, + "wrongAnswers": 38, + "errorAnswers": 45, + "qaSuccessRate": "100.0", + "answerAccuracyRate": "58.3", "categoryBreakdown": { + "1": { + "total": 32, + "withContext": 0, + "withAnswer": 0, + "withGenerated": 32, + "correct": 21, + "wrong": 11, + "errors": 0 + }, "2": { - "total": 1, - "withContext": 1, - "withAnswer": 0 + "total": 37, + "withContext": 0, + "withAnswer": 0, + "withGenerated": 37, + "correct": 29, + "wrong": 8, + "errors": 0 + }, + "3": { + "total": 13, + "withContext": 0, + "withAnswer": 0, + "withGenerated": 13, + "correct": 9, + "wrong": 4, + "errors": 0 + }, + "4": { + "total": 70, + "withContext": 0, + "withAnswer": 0, + "withGenerated": 70, + "correct": 55, + "wrong": 15, + "errors": 0 + }, + "5": { + "total": 47, + "withContext": 0, + "withAnswer": 0, + "withGenerated": 2, + "correct": 2, + "wrong": 0, + "errors": 45 } } }, @@ -25,17 +71,2833 @@ "D1:3" ], "category": 2, - "searchContext": "Caroline attended an LGBTQ support group, where she was inspired by transgender stories and felt accepted, which encouraged her to embrace herself. Caroline plans to continue her education and is interested in exploring career options in counseling or mental health. Melanie expressed appreciation for Caroline's empathy and shared that she painted a lake sunrise last year as a form of self-expression. Caroline complimented Melanie's painting, and both agreed that painting is a valuable outlet for relaxation and self-expression. Caroline intended to do research after the conversation, while Melanie planned to go swimming with her children. Both Caroline and Melanie emphasized the importance of self-care.\nCaroline informed Melanie on 20 July 2023 that she recently joined a new LGBTQ activist group called \"Connected LGBTQ Activists\" and is enjoying making a difference. Melanie expressed happiness for Caroline and showed interest in learning more about the group. Caroline explained that the group focuses on positive change and mutual support. Caroline participated in a recent pride parade in their city, which she described as a powerful reminder of the fight for equality. Melanie shared that she recently went to the beach with her children, which they enjoyed, and mentioned that her family's summer highlight is a camping trip where they witnessed the Perseid meteor shower, an experience that made her feel awe for the universe and appreciate life. Melanie also shared a special memory of her youngest child taking her first steps, which Caroline found sweet and reflective of the special bonds in families. Melanie expressed gratitude for her family, and Caroline praised her for having an awesome family.\nOn August 14, 2023, Melanie and Caroline discussed their recent experiences, with Melanie sharing her enjoyment of a concert for her daughter's birthday and Caroline describing her attendance at an advocacy event focused on love and support. Melanie inquired about Caroline's pride parade experience, leading Caroline to express pride in being part of the LGBTQ community and her commitment to fighting for equality. Melanie emphasized the importance of creating a loving and inclusive environment for their children. Caroline shared that she incorporates inclusivity and diversity into her artwork to advocate for LGBTQ+ acceptance, and that her art expresses her trans experience and aims to foster understanding of the trans community. Caroline shared a painting titled \"Embracing Identity,\" which represents self-acceptance and love, and explained that art has aided her self-discovery and acceptance. Melanie acknowledged the healing power of art and praised Caroline's work. Both Melanie and Caroline invited each other to reach out anytime, reinforcing a supportive relationship.", - "searchResultsCount": 3, - "hasContext": true, - "contextLength": 2810, - "answerInContext": false, "conversationId": "locomo_1", - "facts": [ - "The support group is associated with LGBTQ.", - "Caroline joined Connected LGBTQ Activists.", - "Caroline is a member of the LGBTQ community." - ] + "generatedAnswer": "Caroline attended the LGBTQ support group on May 7th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer provides the exact same date as the gold answer, just in a slightly different format.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie paint a sunrise?", + "expectedAnswer": 2022, + "evidence": [ + "D1:12" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie painted a sunrise in the previous year before May 8th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer states that Melanie painted a sunrise in the year before May 8th, 2023, which would be 2022, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What fields would Caroline be likely to pursue in her educaton?", + "expectedAnswer": "Psychology, counseling certification", + "evidence": [ + "D1:9", + "D1:11" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Caroline would be likely to pursue fields related to counseling and mental health in her education.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions counseling and mental health, which aligns with the gold answer's focus on psychology and counseling certification.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.3333333333333333, + "evaluationMethod": "llm" + }, + { + "question": "What did Caroline research?", + "expectedAnswer": "Adoption agencies", + "evidence": [ + "D2:8" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline researched counseling or mental health work.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer mentions counseling or mental health work, which is different from the gold answer of adoption agencies, so it does not address the same topic.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What is Caroline's identity?", + "expectedAnswer": "Transgender woman", + "evidence": [ + "D1:5" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline identifies as a transgender woman and an artist.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Caroline identifies as a transgender woman, matching the gold answer, and the additional detail about being an artist does not detract from the correctness.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie run a charity race?", + "expectedAnswer": "The sunday before 25 May 2023", + "evidence": [ + "D2:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie ran a charity race on May 20th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies May 20th, 2023, which is the Sunday before May 25, 2023, matching the gold answer's time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "When is Melanie planning on going camping?", + "expectedAnswer": "June 2023", + "evidence": [ + "D2:7" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about when Melanie is planning on going camping.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer incorrectly states that there is no information about when Melanie is planning on going camping, while the gold answer specifies \"June 2023\".\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What is Caroline's relationship status?", + "expectedAnswer": "Single", + "evidence": [ + "D3:13", + "D2:14" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide specific information about Caroline's romantic relationship status. It only describes her friendships, community involvement, and her mutually supportive relationship with Melanie, but does not clarify whether she is single, in a romantic relationship, or any other specific relationship status.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline give a speech at a school?", + "expectedAnswer": "The week before 9 June 2023", + "evidence": [ + "D3:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline gave a speech at a school during the week prior to June 9th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately matches the time frame given in the gold answer, referring to the week before June 9, 2023.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline meet up with her friends, family, and mentors?", + "expectedAnswer": "The week before 9 June 2023", + "evidence": [ + "D3:11" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Caroline attended a picnic with her support network—which includes friends and family—during the week of June 26th to July 2nd, 2023. There is no specific information in the context about a meeting with her mentors at that time.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer gives a different date range (week of June 26th to July 2nd, 2023) than the gold answer (the week before 9 June 2023), so it does not match the required time period. \n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "How long has Caroline had her current group of friends for?", + "expectedAnswer": "4 years", + "evidence": [ + "D3:13" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline has had her current group of friends for four years.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer in both content and time frame, stating that Caroline has had her current group of friends for four years.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Where did Caroline move from 4 years ago?", + "expectedAnswer": "Sweden", + "evidence": [ + "D3:13", + "D4:3" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline moved from Sweden 4 years ago.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Caroline moved from Sweden 4 years ago, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "How long ago was Caroline's 18th birthday?", + "expectedAnswer": "10 years ago", + "evidence": [ + "D4:5" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline's 18th birthday was ten years prior to June 27th, 2023. Therefore, her 18th birthday was ten years ago from that date.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Caroline's 18th birthday was ten years ago, matching the gold answer in meaning and timeframe.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What career path has Caroline decided to persue?", + "expectedAnswer": "counseling or mental health for Transgender people", + "evidence": [ + "D4:13", + "D1:11" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline has decided to pursue a career in counseling and mental health, motivated by a desire to help others with similar challenges and to give back to the community.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately reflects Caroline's decision to pursue a career in counseling and mental health, which matches the gold answer's focus on counseling or mental health for transgender people.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "Would Caroline still want to pursue counseling as a career if she hadn't received support growing up?", + "expectedAnswer": "Likely no", + "evidence": [ + "D4:15", + "D3:5" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, there is no information indicating whether Caroline would still want to pursue counseling as a career if she hadn't received support growing up. The context explicitly states that her motivation to pursue counseling is causally linked to her own mental health journey and the support she received. Therefore, without the experience of receiving support, it is unclear from the context whether she would have developed the same interest in counseling.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What activities does Melanie partake in?", + "expectedAnswer": "pottery, camping, painting, swimming", + "evidence": [ + "D5:4", + "D9:1", + "D1:12", + "D1:18" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie partakes in the following activities:\n- Collaborative art activities with her family\n- Recreational activities with her children (including swimming)\n- Exploring forests as a meaningful family activity\n- Hiking in the mountains as a meaningful family activity\n- Camping trips with her family (including roasting marshmallows, telling stories around the campfire, and enjoying each other's company)\n- Playing frisbee golf as a family or recreational activity\n- Partnering with others in LGBTQ advocacy and expressing interest in upcoming LGBTQ advocacy activities", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.25, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie sign up for a pottery class?", + "expectedAnswer": "2 July 2023", + "evidence": [ + "D5:4" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about when Melanie signed up for a pottery class.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention the date or time period when Melanie signed up for a pottery class, which is required by the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When is Caroline going to the transgender conference?", + "expectedAnswer": "July 2023", + "evidence": [ + "D5:13" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide a specific date for when Caroline is going to the transgender conference, only that she informed Melanie on July 3rd, 2023, that she would be attending it later in July 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies that Caroline is attending the transgender conference in July 2023, matching the gold answer's time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Where has Melanie camped?", + "expectedAnswer": "beach, mountains, forest", + "evidence": [ + "D6:16", + "D4:6", + "D8:32" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie has camped in the mountains, at the beach, and in the forest.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer includes all the locations mentioned in the gold answer (beach, mountains, forest), matching the required information.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What do Melanie's kids like?", + "expectedAnswer": "dinosaurs, nature", + "evidence": [ + "D6:6", + "D4:8" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's kids like learning about animals, are enthusiastic about the dinosaur exhibit, are fascinated by bones, and enjoy creative activities such as making a dog-themed cup during a pottery workshop.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions dinosaurs (dinosaur exhibit) and animals/nature, which aligns with the gold answer topics of dinosaurs and nature.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie go to the museum?", + "expectedAnswer": "5 July 2023", + "evidence": [ + "D6:4" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie went to the museum on July 5th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer provides the same date as the gold answer, just in a different format.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline have a picnic?", + "expectedAnswer": "The week before 6 July 2023", + "evidence": [ + "D6:11" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline had a picnic during the week of June 26th to July 2nd, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies the same week as the gold answer, just with explicit dates, so it matches the required time period.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "Would Caroline likely have Dr. Seuss books on her bookshelf?", + "expectedAnswer": "Yes, since she collects classic children's books", + "evidence": [ + "D6:9" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, it is likely that Caroline would have Dr. Seuss books on her bookshelf. The context specifies that her children's book collection includes classics, stories from different cultures, and educational books. Dr. Seuss books are widely considered children's classics and are often included in thoughtfully curated children's libraries aimed at fostering intellectual growth and curiosity. Therefore, it is reasonable to conclude that Dr. Seuss books would fit within the types of books Caroline collects for her future children.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly infers that Caroline would likely have Dr. Seuss books because she collects classic children's books, matching the reasoning in the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.7142857142857143, + "evaluationMethod": "llm" + }, + { + "question": "What books has Melanie read?", + "expectedAnswer": "\"Nothing is Impossible\", \"Charlotte's Web\"", + "evidence": [ + "D7:8", + "D6:10" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Melanie has read \"Charlotte's Web\" as a child and is currently reading a book recommended by Caroline (the specific title of this book is not given in the context).", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer only mentions \"Charlotte's Web\" and an unspecified book, but omits \"Nothing is Impossible,\" which is specifically listed in the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "What does Melanie do to destress?", + "expectedAnswer": "Running, pottery", + "evidence": [ + "D7:22", + "D5:4" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie destresses by engaging in activities such as running, reading, playing the violin, painting, swimming, and pottery. She also participates in family activities to relax and recover emotionally, especially after events like road trips.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline go to the LGBTQ conference?", + "expectedAnswer": "10 July 2023", + "evidence": [ + "D7:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline attended the LGBTQ conference on July 10th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer in both date and event, just with slightly different wording.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie read the book \"nothing is impossible\"?", + "expectedAnswer": 2022, + "evidence": [ + "D7:8" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about when Melanie read the book \"nothing is impossible.\"", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention the year 2022 or provide any information about when Melanie read the book, so it does not match the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "Would Caroline pursue writing as a career option?", + "expectedAnswer": "LIkely no; though she likes reading, she wants to be a counselor", + "evidence": [ + "D7:5", + "D7:9" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information indicating that Caroline would pursue writing as a career option. It specifically states that she is exploring career options related to counseling and mental health.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Caroline is interested in counseling and mental health, not writing, which aligns with the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.2222222222222222, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline go to the adoption meeting?", + "expectedAnswer": "The friday before 15 July 2023", + "evidence": [ + "D8:9" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline went to the adoption meeting on July 14th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies July 14th, 2023, which is the Friday before July 15th, 2023, matching the gold answer's time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie go to the pottery workshop?", + "expectedAnswer": "The Friday before 15 July 2023", + "evidence": [ + "D8:2" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie went to the pottery workshop on July 14th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies July 14th, 2023, which is the Friday before July 15th, 2023, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6, + "evaluationMethod": "llm" + }, + { + "question": "Would Melanie be considered a member of the LGBTQ community?", + "expectedAnswer": "Likely no, she does not refer to herself as part of it", + "evidence": [], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide enough information to determine whether Melanie is a member of the LGBTQ community. It only describes Melanie's supportive actions and encouragement toward Caroline's involvement in LGBTQ+ events, but does not specify Melanie's own identity or membership in the community.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.25, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie go camping in June?", + "expectedAnswer": "The week before 27 June 2023", + "evidence": [ + "D4:8" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about Melanie going camping in June.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer incorrectly states that there is no information about Melanie going camping in June, while the gold answer specifies the week before 27 June 2023.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "What LGBTQ+ events has Caroline participated in?", + "expectedAnswer": "Pride parade, school speech, support group", + "evidence": [ + "D5:1", + "D8:17", + "D3:1", + "D1:3" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline has participated in the following LGBTQ+ events:\n- A mentorship program for LGBTQ youth on the weekend of July 15th-16th, 2023.\n- A transgender poetry reading event where transgender people shared their stories through poetry (shared on October 13th, 2023).\n- A pride parade (Caroline shared a photo of the event on July 15th, 2023).", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer includes \"pride parade,\" which matches one of the gold answer events, and while it doesn't mention the school speech or support group, it provides other LGBTQ+ events Caroline participated in, which is sufficient for a correct label.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.16666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline go to a pride parade during the summer?", + "expectedAnswer": "The week before 3 July 2023", + "evidence": [ + "D5:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline went to a pride parade during the summer on August 11th, 2023.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer gives a date (August 11th, 2023) that is more than a month after the gold answer (the week before July 3, 2023), so it does not match the referenced time period.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "What events has Caroline participated in to help children?", + "expectedAnswer": "Mentoring program, school speech", + "evidence": [ + "D9:2", + "D3:3" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Caroline has participated in the following events to help children:\n\n- She joined a mentorship program for LGBTQ youth.\n- She attended a council meeting for adoption, which strengthened her determination to adopt and provide a loving home for children.\n- She organizes the Children's Talent Show.\n\nThese activities demonstrate Caroline's involvement in supporting and helping children.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions a mentorship program (matching \"mentoring program\") and includes other relevant child-focused activities, which sufficiently covers the gold answer's topics. \n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie go camping in July?", + "expectedAnswer": "two weekends before 17 July 2023", + "evidence": [ + "D9:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not specify that Melanie went camping in July. It only states that on June 27th, 2023, Melanie said she had recently taken her family camping during the previous week, which would place the camping trip in June, not July.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly points out that there is no information about Melanie camping in July and clarifies the actual timing based on the context.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline join a mentorship program?", + "expectedAnswer": "The weekend before 17 July 2023", + "evidence": [ + "D9:2" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline joined a mentorship program on the weekend of July 15th-16th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies the weekend of July 15th-16th, 2023, which matches the gold answer's \"the weekend before 17 July 2023.\"\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "What did Melanie paint recently?", + "expectedAnswer": "sunset", + "evidence": [ + "D8:6; D9:17" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie recently painted a sunflower.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer says Melanie painted a sunflower, while the gold answer says she painted a sunset, so the topics do not match.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What activities has Melanie done with her family?", + "expectedAnswer": "Pottery, painting, camping, museum, swimming, hiking", + "evidence": [ + "D8:4", + "D8:6", + "D9:1", + "D6:4", + "D1:18", + "D3:14" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie has gone hiking in the mountains, explored forests, and engaged in collaborative art activities with her family.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.16666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "In what ways is Caroline participating in the LGBTQ community?", + "expectedAnswer": "Joining activist group, going to pride parades, participating in an art show, mentoring program", + "evidence": [ + "D10:3", + "D5:1", + "D9:12", + "D9:2" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline is participating in the LGBTQ community by attending and documenting live literary gatherings focused on trans voices, such as transgender poetry readings; joining and actively engaging in an LGBTQ activist group; providing mentorship; participating in support groups, conferences, and advocacy activities; and supporting and uplifting trans identities through her involvement in meaningful community experiences.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.36363636363636365, + "evaluationMethod": "llm" + }, + { + "question": "How many times has Melanie gone to the beach in 2023?", + "expectedAnswer": 2, + "evidence": [ + "D10:8", + "D6:16" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide specific information about how many times Melanie has gone to the beach in 2023, only that her family typically visits once or twice a year.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline join a new activist group?", + "expectedAnswer": "The Tuesday before 20 July 2023", + "evidence": [ + "D10:3" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline joined a new activist group on July 18th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies July 18th, 2023, which is the Tuesday before July 20, 2023, matching the gold answer's time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "Would Melanie be more interested in going to a national park or a theme park?", + "expectedAnswer": "National park; she likes the outdoors", + "evidence": [ + "D10:12", + "D10:14" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Melanie would be more interested in going to a national park. The context highlights her appreciation for nature, enjoyment of outdoor activities such as camping, and her focus on nature-themed art, all of which align more closely with the experiences offered by a national park rather than a theme park.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies that Melanie would prefer a national park and provides supporting reasons consistent with the gold answer. \n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.3333333333333333, + "evaluationMethod": "llm" + }, + { + "question": "What kind of art does Caroline make?", + "expectedAnswer": "abstract art", + "evidence": [ + "D11:12", + "D11:8", + "D9:14" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline makes paintings, including abstract art, nature-themed works, self-portraits, advocacy-themed pieces, and public art such as rainbow-inspired sidewalk paintings. Her art often focuses on storytelling, LGBTQ+ advocacy, and fostering community.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer explicitly mentions that Caroline makes abstract art, which matches the gold answer, and provides additional relevant details. \n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When is Melanie's daughter's birthday?", + "expectedAnswer": "13 August", + "evidence": [ + "D11:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Melanie's daughter's birthday is on August 13th.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies Melanie's daughter's birthday as August 13th, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline attend a pride parade in August?", + "expectedAnswer": "The Friday before 14 August 2023", + "evidence": [ + "D11:4" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline attended a pride parade on August 11th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer \"August 11th, 2023\" is the Friday before August 14th, 2023, matching the gold answer's time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "Would Melanie be considered an ally to the transgender community?", + "expectedAnswer": "Yes, she is supportive", + "evidence": [], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Yes, based on the provided context, Melanie would be considered an ally to the transgender community. She consistently expresses support for Caroline's participation in transgender and LGBTQ+ events, encourages involvement in community activities, affirms the value of such events, engages in reflective dialogue about identity and advocacy, and partners with others in LGBTQ advocacy. Melanie's actions and words demonstrate ongoing encouragement, affirmation, and active engagement with the transgender and broader LGBTQ+ community.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer affirms that Melanie is supportive of the transgender community and provides additional context, which aligns with the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "Who supports Caroline when she has a negative experience?", + "expectedAnswer": "Her mentors, family, and friends", + "evidence": [ + "D12:1", + "D3:11" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline is supported by her support network, including friends, family, mentors, and specifically Melanie, when she has a negative experience.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer includes all the groups mentioned in the gold answer (mentors, family, and friends) and adds a specific name, which is acceptable.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What types of pottery have Melanie and her kids made?", + "expectedAnswer": "bowls, cup", + "evidence": [ + "D12:14", + "D8:4", + "D5:6" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's kids made a cup with a dog face on it, and Melanie made a bowl with a colorful design.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline and Melanie go to a pride fesetival together?", + "expectedAnswer": 2022, + "evidence": [ + "D12:15" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline and Melanie attended Pride fest together the previous year (i.e., the year before 2023).", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer refers to \"the previous year (i.e., the year before 2023)\", which is 2022, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What would Caroline's political leaning likely be?", + "expectedAnswer": "Liberal", + "evidence": [ + "D12:1" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the context, Caroline is committed to LGBTQ advocacy, creating change, and building a more inclusive and understanding world. She faced adversity from religious conservatives and reflected on the need for continued work toward equality. These details suggest that Caroline's political leaning is likely progressive or liberal.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What has Melanie painted?", + "expectedAnswer": "Horse, sunset, sunrise", + "evidence": [ + "D13:8", + "D8:6", + "D1:12" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie has painted a sunflower.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention any of the subjects listed in the gold answer (horse, sunset, sunrise), so it does not match the referenced topic.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What are Melanie's pets' names?", + "expectedAnswer": "Oliver, Luna, Bailey", + "evidence": [ + "D13:4", + "D7:18" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's pets' names are Luna, Oliver, and Bailey.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer lists all three pet names—Luna, Oliver, and Bailey—matching the gold answer, just in a different order.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline apply to adoption agencies?", + "expectedAnswer": "The week of 23 August 2023", + "evidence": [ + "D13:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline applied to adoption agencies on August 23rd, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies August 23rd, 2023, which falls within the week of 23 August 2023 as stated in the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline draw a self-portrait?", + "expectedAnswer": "The week before 23 August 2023", + "evidence": [ + "D13:11" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline created a self-portrait during the week prior to August 23rd, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately matches the time frame given in the gold answer, referring to the week before August 23, 2023.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "What subject have Caroline and Melanie both painted?", + "expectedAnswer": "Sunsets", + "evidence": [ + "D14:5", + "D8:6" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, only Melanie is mentioned as having painted a horse (the horse painting she shared with Caroline). There is no information in the context indicating that Caroline has painted a horse or any other specific animal. Therefore, the context does not specify a subject that both Caroline and Melanie have painted.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What symbols are important to Caroline?", + "expectedAnswer": "Rainbow flag, transgender symbol", + "evidence": [ + "D14:15", + "D4:1" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "The important symbols to Caroline are the rainbow (symbolizing togetherness, celebrating differences, and trans courage and strength), the clock (representing her and Melanie's shared journey and the passage of time), and the eagle (symbolizing freedom, pride, and resilience, both personally and for the trans community). The eagle mural is also especially meaningful to her as a representation of acceptance and community support.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer includes the rainbow (which corresponds to the rainbow flag) and references trans courage and strength (which relates to the transgender symbol), thus covering the key symbols from the gold answer, even though it adds more detail.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline encounter people on a hike and have a negative experience?", + "expectedAnswer": "The week before 25 August 2023", + "evidence": [ + "D14:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline encountered people on a hike and had a negative experience during the week of August 14th–20th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie make a plate in pottery class?", + "expectedAnswer": "24 August 2023", + "evidence": [ + "D14:4" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie made a plate in pottery class on August 24th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer provides the same date as the gold answer, just in a slightly different format.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Would Caroline be considered religious?", + "expectedAnswer": "Somewhat, but not extremely religious", + "evidence": [ + "D14:19", + "D12:1" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about Caroline's own religious beliefs or practices. Therefore, it is not possible to determine whether Caroline would be considered religious based on the provided information.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "What instruments does Melanie play?", + "expectedAnswer": "clarinet and violin", + "evidence": [ + "D15:26", + "D2:5" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie plays the violin and the clarinet.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately lists both the violin and clarinet, matching the gold answer in content.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What musical artists/bands has Melanie seen?", + "expectedAnswer": "Summer Sounds, Matt Patterson", + "evidence": [ + "D15:16", + "D11:3" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie has seen Matt Patterson and a band (name not specified) perform.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions Matt Patterson (matching the gold answer) and refers to a band, though it does not specify \"Summer Sounds\" by name; however, it acknowledges the existence of a band, which is sufficient.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie go to the park?", + "expectedAnswer": "27 August 2023", + "evidence": [ + "D15:2" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie went to the park on August 27th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer in both date and content, just with a slightly different format.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When is Caroline's youth center putting on a talent show?", + "expectedAnswer": "September 2023", + "evidence": [ + "D15:11" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The children's talent show organized by Caroline is taking place in the month following August 28th, 2023, which is September 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer clearly states that the talent show is in September 2023, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Would Melanie likely enjoy the song \"The Four Seasons\" by Vivaldi?", + "expectedAnswer": "Yes; it's classical music", + "evidence": [ + "D15:28" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Yes, Melanie would likely enjoy the song \"The Four Seasons\" by Vivaldi, as she is a fan of classical music and appreciates both classical composers and nature-inspired themes.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Melanie would likely enjoy \"The Four Seasons\" by Vivaldi because she enjoys classical music, matching the reasoning in the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "What are some changes Caroline has faced during her transition journey?", + "expectedAnswer": "Changes to her body, losing unsupportive friends", + "evidence": [ + "D16:15", + "D11:14" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Some changes Caroline has faced during her transition journey include changes to her body, shifts in her relationships (with some friends continuing to support her while others could not handle the changes), and navigating the process of self-acceptance and authenticity. These changes have influenced her sense of belonging, the authenticity and quality of her interpersonal connections, and her experience of support from friends and family.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions both changes to Caroline's body and the loss of unsupportive friends, matching the key points in the gold answer, and adds some elaboration without contradicting the facts.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "What does Melanie do with her family on hikes?", + "expectedAnswer": "Roast marshmallows, tell stories", + "evidence": [ + "D16:4", + "D10:12" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie and her family hike in the mountains and explore forests as meaningful ways to connect with nature and with each other.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention roasting marshmallows or telling stories, which are the specific activities referenced in the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline go biking with friends?", + "expectedAnswer": "The weekend before 13 September 2023", + "evidence": [ + "D16:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline went biking with friends the previous weekend before September 12th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies the same weekend before mid-September 2023 as the gold answer, just referencing September 12th instead of 13th, which is effectively the same time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "How long has Melanie been practicing art?", + "expectedAnswer": "Since 2016", + "evidence": [ + "D16:8" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain enough information to answer how long Melanie has been practicing art.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer incorrectly states that there is not enough information, while the gold answer specifies \"Since 2016,\" so the generated answer is inaccurate.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What personality traits might Melanie say Caroline has?", + "expectedAnswer": "Thoughtful, authentic, driven", + "evidence": [ + "D16:18", + "D13:16", + "D7:4" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the context, Melanie might say that Caroline has the following personality traits:\n\n- Strength (describing Caroline as \"strong\")\n- Inspirational qualities (describing Caroline as \"inspiring\")\n- Maturity (praising Caroline's maturity in conflict resolution)\n- Courage (praising Caroline's courage in apologizing)\n- Self-acceptance (recognizing Caroline's journey toward self-acceptance)\n- Supportiveness (acknowledging Caroline's readiness to help others)\n- Openness (noting Caroline's ongoing sharing of life updates)\n- Authenticity (affirming the value of being oneself and embracing authenticity)", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What transgender-specific events has Caroline attended?", + "expectedAnswer": "Poetry reading, conference", + "evidence": [ + "D17:19", + "D15:13" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline has attended a transgender poetry reading and an LGBTQ+ counseling workshop. She also planned to attend a transgender conference later in July 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions both a transgender poetry reading and a transgender conference, which matches the gold answer's events.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "What book did Melanie read from Caroline's suggestion?", + "expectedAnswer": "\"Becoming Nicole\"", + "evidence": [ + "D7:11", + "D17:10" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not specify the title of the book that Melanie read from Caroline's suggestion.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer fails to mention \"Becoming Nicole\" or any book title, so it does not address the referenced topic.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie's friend adopt a child?", + "expectedAnswer": 2022, + "evidence": [ + "D17:3" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's friend adopted a child in the previous year (i.e., in 2022).", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies \"the previous year (i.e., in 2022)\", which matches the gold answer of 2022.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie get hurt?", + "expectedAnswer": "September 2023", + "evidence": [ + "D17:8" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide a specific date for when Melanie got hurt.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention \"September 2023\" or any equivalent time reference, so it does not match the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie's family go on a roadtrip?", + "expectedAnswer": "The weekend before 20 October 2023", + "evidence": [ + "D18:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide the exact date of Melanie's family's road trip. It only states that after the road trip, on October 19th, 2023, Melanie's family engaged in a relaxing activity. Therefore, the road trip occurred sometime before October 19th, 2023, but the specific date is not given.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies that the road trip occurred before October 19th, 2023, which aligns with the gold answer of the weekend before October 20, 2023.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "How many children does Melanie have?", + "expectedAnswer": 3, + "evidence": [ + "D18:1", + "D18:7" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Melanie has two children.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer states that Melanie has two children, while the gold answer says she has three, so the answers do not match.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie go on a hike after the roadtrip?", + "expectedAnswer": "19 October 2023", + "evidence": [ + "D18:17" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain information about Melanie going on a hike after the road trip.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer incorrectly states that there is no information about Melanie's hike after the road trip, while the gold answer provides a specific date, so the generated answer is incorrect.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "Would Melanie go on another roadtrip soon?", + "expectedAnswer": "Likely no; since this one went badly", + "evidence": [ + "D18:3", + "D18:1" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about Melanie's future plans or whether she would go on another road trip soon.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer fails to reference the negative experience from the last road trip, which is key to the gold answer's reasoning.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What items has Melanie bought?", + "expectedAnswer": "Figurines, shoes", + "evidence": [ + "D19:2", + "D7:18" + ], + "category": 1, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie has bought wooden dolls and pink sneakers.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer refers to \"wooden dolls\" (a type of figurine) and \"pink sneakers\" (a type of shoes), matching the gold answer's topics.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "When did Caroline pass the adoption interview?", + "expectedAnswer": "The Friday before 22 October 2023", + "evidence": [ + "D19:1" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline passed the adoption interview on October 13th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies October 13th, 2023, which is the Friday before October 22, 2023, matching the gold answer's time frame.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6, + "evaluationMethod": "llm" + }, + { + "question": "When did Melanie buy the figurines?", + "expectedAnswer": "21 October 2023", + "evidence": [ + "D19:2" + ], + "category": 2, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie bought the figurines on October 21st, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer gives the same date as the gold answer, just in a slightly different format.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Would Caroline want to move back to her home country soon?", + "expectedAnswer": "No; she's in the process of adopting children.", + "evidence": [ + "D19:1", + "D19:3" + ], + "category": 3, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about whether Caroline would want to move back to her home country soon.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer fails to mention Caroline's adoption process, which is the key reason given in the gold answer for her not wanting to move back, and incorrectly claims there is no information provided.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.16666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "What did the charity race raise awareness for?", + "expectedAnswer": "mental health", + "evidence": [ + "D2:2" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The charity race raised awareness for mental health.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer directly matches the gold answer by stating that the charity race raised awareness for mental health.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What did Melanie realize after the charity race?", + "expectedAnswer": "self-care is important", + "evidence": [ + "D2:3" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain information about a charity race or what Melanie realized after it.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention self-care or what Melanie realized after the charity race, and instead states that the information is not present.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "How does Melanie prioritize self-care?", + "expectedAnswer": "by carving out some me-time each day for activities like running, reading, or playing the violin", + "evidence": [ + "D2:5" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie prioritizes self-care by carving out daily \"me-time\" for activities such as running, reading, and playing the violin. She recognizes self-care as an ongoing journey and connects these practices to her ability to better care for her family. Melanie is committed to continuing these self-care habits for her well-being, and she receives affirmation and support from Caroline, with whom she shares ongoing mutual encouragement regarding self-care.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately describes Melanie's approach to self-care by mentioning daily \"me-time\" and the specific activities listed in the gold answer, while providing some additional context.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.7142857142857143, + "evaluationMethod": "llm" + }, + { + "question": "What are Caroline's plans for the summer?", + "expectedAnswer": "researching adoption agencies", + "evidence": [ + "D2:8" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Caroline's plans for the summer include collaboratively planning and participating in a special one-on-one outing with Melanie, focused on their friendship and creating a unique experience together.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What type of individuals does the adoption agency Caroline is considering support?", + "expectedAnswer": "LGBTQ+ individuals", + "evidence": [ + "D2:12" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The adoption agency Caroline is considering supports LGBTQ+ individuals.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer directly matches the gold answer by stating that the adoption agency supports LGBTQ+ individuals.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Why did Caroline choose the adoption agency?", + "expectedAnswer": "because of their inclusivity and support for LGBTQ+ individuals", + "evidence": [ + "D2:12" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline chose the adoption agency because of its inclusivity and support for LGBTQ+ individuals seeking adoption.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately matches the gold answer by stating that Caroline chose the agency due to its inclusivity and support for LGBTQ+ individuals.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.875, + "evaluationMethod": "llm" + }, + { + "question": "What is Caroline excited about in the adoption process?", + "expectedAnswer": "creating a family for kids who need one", + "evidence": [ + "D2:14" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline is excited to begin the new chapter of adopting and providing a safe, loving home for children, which has been a long-held dream for her.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.14285714285714285, + "evaluationMethod": "llm" + }, + { + "question": "What does Melanie think about Caroline's decision to adopt?", + "expectedAnswer": "she thinks Caroline is doing something amazing and will be an awesome mom", + "evidence": [ + "D2:15" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie is proud of Caroline's decision to adopt, affirms and supports her step toward building a family through adoption, expresses enthusiasm for her progress in the adoption journey, and recognizes Caroline's commitment to adoption.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.2, + "evaluationMethod": "llm" + }, + { + "question": "How long have Mel and her husband been married?", + "expectedAnswer": "Mel and her husband have been married for 5 years.", + "evidence": [ + "D3:16" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Mel and her husband have been married for five years as of June 9th, 2023.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Mel and her husband have been married for five years, matching the gold answer in content.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8888888888888888, + "evaluationMethod": "llm" + }, + { + "question": "What does Caroline's necklace symbolize?", + "expectedAnswer": "love, faith, and strength", + "evidence": [ + "D4:3" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline's necklace symbolizes love, faith, and strength. It also serves as a reminder of her roots, her Swedish heritage, and the love and support she receives from her family.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer includes the key elements of the gold answer—love, faith, and strength—so it covers the required symbolism.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What country is Caroline's grandma from?", + "expectedAnswer": "Sweden", + "evidence": [ + "D4:3" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Sweden", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer exactly matches the gold answer and correctly identifies the country as Sweden.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What was grandma's gift to Caroline?", + "expectedAnswer": "necklace", + "evidence": [ + "D4:3" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Grandma's gift to Caroline was the necklace with a cross and a heart.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies the gift as a necklace, matching the gold answer and providing additional detail.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What is Melanie's hand-painted bowl a reminder of?", + "expectedAnswer": "art and self-expression", + "evidence": [ + "D4:5" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's hand-painted bowl is a reminder of her creative inspiration and fulfillment found in painting and pottery, her emotional satisfaction and calm derived from these activities, her pride in her artistic achievements, and the connection between pottery and themes of self-discovery and acceptance.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer elaborates on the themes of art, self-expression, and creativity, which aligns with the gold answer's focus on art and self-expression.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "What did Melanie and her family do while camping?", + "expectedAnswer": "explored nature, roasted marshmallows, and went on a hike", + "evidence": [ + "D4:8" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "While camping, Melanie and her family roasted marshmallows, told stories around the campfire, and enjoyed each other's company.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer mentions roasting marshmallows and enjoying time together, which partially overlaps with the gold answer, but it does not mention exploring nature or hiking, missing key activities.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.42857142857142855, + "evaluationMethod": "llm" + }, + { + "question": "What kind of counseling and mental health services is Caroline interested in pursuing?", + "expectedAnswer": "working with trans people, helping them accept themselves and supporting their mental health", + "evidence": [ + "D4:13" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not specify the exact kind of counseling and mental health services Caroline is interested in pursuing. It only states that she is motivated by her own mental health journey and desires to help others with similar challenges, but does not detail a specific type or area within counseling or mental health services.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0.3076923076923077, + "evaluationMethod": "llm" + }, + { + "question": "What workshop did Caroline attend recently?", + "expectedAnswer": "LGBTQ+ counseling workshop", + "evidence": [ + "D4:13" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline recently attended an LGBTQ+ counseling workshop.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer exactly, mentioning the LGBTQ+ counseling workshop attended by Caroline.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What was discussed in the LGBTQ+ counseling workshop?", + "expectedAnswer": "therapeutic methods and how to best work with trans people", + "evidence": [ + "D4:13" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Therapeutic methods for working with trans people were discussed in the LGBTQ+ counseling workshop.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately mentions the discussion of therapeutic methods for working with trans people, which matches the key topics in the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "What motivated Caroline to pursue counseling?", + "expectedAnswer": "her own journey and the support she received, and how counseling improved her life", + "evidence": [ + "D4:15" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline was motivated to pursue counseling by her own struggles with mental health, the support she received during her journey, her positive experiences with counseling and support groups, and her desire to help people who have experienced similar challenges by providing support, connection, and safe spaces for growth.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.7142857142857143, + "evaluationMethod": "llm" + }, + { + "question": "What kind of place does Caroline want to create for people?", + "expectedAnswer": "a safe and inviting place for people to grow", + "evidence": [ + "D4:15" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline wants to create a safe, supportive, and inclusive space where people can grow, express themselves authentically, and feel empowered.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer captures the essence of creating a safe and inviting place for people to grow, matching the intent of the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5714285714285714, + "evaluationMethod": "llm" + }, + { + "question": "Did Melanie make the black and white bowl in the photo?", + "expectedAnswer": "Yes", + "evidence": [ + "D5:8" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Yes, Melanie made the black and white bowl in the photo.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer directly confirms that Melanie made the black and white bowl, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What kind of books does Caroline have in her library?", + "expectedAnswer": "kids' books - classics, stories from different cultures, educational books", + "evidence": [ + "D6:9" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline's library includes classics, stories from different cultures, and educational books.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately lists the same types of books as the gold answer, covering classics, stories from different cultures, and educational books, which matches the intent and content.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8888888888888888, + "evaluationMethod": "llm" + }, + { + "question": "What was Melanie's favorite book from her childhood?", + "expectedAnswer": "\"Charlotte's Web\"", + "evidence": [ + "D6:10" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's favorite book from her childhood was \"Charlotte's Web.\"", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer directly matches the gold answer by stating that Melanie's favorite childhood book was \"Charlotte's Web.\"\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "What book did Caroline recommend to Melanie?", + "expectedAnswer": "\"Becoming Nicole\"", + "evidence": [ + "D7:11" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide the title or name of the book that Caroline recommended to Melanie.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention \"Becoming Nicole\" or any book recommendation, so it does not address the question.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What did Caroline take away from the book \"Becoming Nicole\"?", + "expectedAnswer": "Lessons on self-acceptance and finding support", + "evidence": [ + "D7:13" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline found \"Becoming Nicole\" by Amy Ellis Nutt especially inspiring and hopeful for her own path as a trans woman. The true story of a trans girl and her family gave her a sense of connection and hope, reinforcing her resilience and optimism. Caroline strongly recommends the book for its inspirational value, as it deepened her sense of self-acceptance and advocacy.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions self-acceptance and support (through connection, hope, and advocacy), which aligns with the gold answer's key points.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.4, + "evaluationMethod": "llm" + }, + { + "question": "What are the new shoes that Melanie got used for?", + "expectedAnswer": "Running", + "evidence": [ + "D7:19" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The new shoes that Melanie got are used for running.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer directly states that Melanie's new shoes are used for running, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What is Melanie's reason for getting into running?", + "expectedAnswer": "To de-stress and clear her mind", + "evidence": [ + "D7:21" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie got into running as a self-care practice to destress, clear her mind, and support her emotional well-being, which helps her refresh herself and maintain presence for her family.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "What does Melanie say running has been great for?", + "expectedAnswer": "Her mental health", + "evidence": [ + "D7:24" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie says running has been great for destressing and clearing her mind.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions that running helps Melanie destress and clear her mind, which aligns with the idea of benefiting her mental health.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.3333333333333333, + "evaluationMethod": "llm" + }, + { + "question": "What did Mel and her kids make during the pottery workshop?", + "expectedAnswer": "pots", + "evidence": [ + "D8:2" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie and her kids made a dog-themed cup out of clay during the pottery workshop.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer specifies that Mel and her kids made a cup (which is a type of pot) out of clay, aligning with the gold answer's reference to \"pots.\"\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What kind of pot did Mel and her kids make with clay?", + "expectedAnswer": "a cup with a dog face on it", + "evidence": [ + "D8:4" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie made a bowl, and her children made a cup with a dog face on it, both using clay.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly mentions that Mel's children made a cup with a dog face on it, matching the gold answer's topic.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What creative project do Mel and her kids do together besides pottery?", + "expectedAnswer": "painting", + "evidence": [ + "D8:5" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie and her kids create collaborative paintings together, particularly nature-inspired artwork.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions that Melanie and her kids create collaborative paintings, which matches the gold answer of \"painting.\"\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What did Mel and her kids paint in their latest project in July 2023?", + "expectedAnswer": "a sunset with a palm tree", + "evidence": [ + "D8:6" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain enough information to answer the question about what Mel and her kids painted in their latest project in July 2023.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention \"a sunset with a palm tree\" or any details about what Mel and her kids painted, so it does not address the question.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What did Caroline see at the council meeting for adoption?", + "expectedAnswer": "many people wanting to create loving homes for children in need", + "evidence": [ + "D8:9" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline saw a number of people seeking to create loving homes for children in need at the council meeting for adoption.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately reflects the key point of the gold answer, mentioning people seeking to create loving homes for children in need at the adoption council meeting.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.7777777777777778, + "evaluationMethod": "llm" + }, + { + "question": "What do sunflowers represent according to Caroline?", + "expectedAnswer": "warmth and happiness", + "evidence": [ + "D8:11" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "According to Caroline, sunflowers represent warmth and happiness.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer exactly, correctly identifying what sunflowers represent according to Caroline.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Why are flowers important to Melanie?", + "expectedAnswer": "They remind her to appreciate the small moments and were a part of her wedding decor", + "evidence": [ + "D8:12" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Flowers are important to Melanie because they bring her joy, symbolize growth and beauty, and remind her to appreciate small moments. Additionally, flowers hold personal significance for her as they were an important part of her wedding decor and always remind her of her wedding day.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What inspired Caroline's painting for the art show?", + "expectedAnswer": "visiting an LGBTQ center and wanting to capture unity and strength", + "evidence": [ + "D9:16" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline's painting for the art show was inspired by her visit to an LGBTQ center, with the intention of capturing the unity and strength of everyone there.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately reflects the inspiration (visiting an LGBTQ center and wanting to capture unity and strength) as stated in the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "How often does Melanie go to the beach with her kids?", + "expectedAnswer": "once or twice a year", + "evidence": [ + "D10:10" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie goes to the beach with her kids once or twice a year.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer exactly in both frequency and context.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What did Melanie and her family see during their camping trip last year?", + "expectedAnswer": "Perseid meteor shower", + "evidence": [ + "D10:14" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie and her family saw the Perseid meteor shower during their camping trip last year.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "How did Melanie feel while watching the meteor shower?", + "expectedAnswer": "in awe of the universe", + "evidence": [ + "D10:18" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie felt a sense of unity with the universe while watching the meteor shower.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer captures a similar emotional response of wonder and connection to the universe as the gold answer's \"in awe of the universe.\" \n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "Whose birthday did Melanie celebrate recently?", + "expectedAnswer": "Melanie's daughter", + "evidence": [ + "D11:1" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie recently celebrated her daughter's birthday.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Melanie celebrated her daughter's birthday, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "Who performed at the concert at Melanie's daughter's birthday?", + "expectedAnswer": "Matt Patterson", + "evidence": [ + "D11:3" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Matt Patterson performed at the concert at Melanie's daughter's birthday.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies Matt Patterson as the performer, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Why did Melanie choose to use colors and patterns in her pottery project?", + "expectedAnswer": "She wanted to catch the eye and make people smile.", + "evidence": [ + "D12:6" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain information about why Melanie chose to use colors and patterns in her pottery project.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer states that the context does not provide the reason, which does not address the gold answer's explanation about catching the eye and making people smile.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.2222222222222222, + "evaluationMethod": "llm" + }, + { + "question": "What pet does Caroline have?", + "expectedAnswer": "guinea pig", + "evidence": [ + "D13:3" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline has a guinea pig named Oscar.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly states that Caroline has a guinea pig, matching the gold answer and even providing the pet's name for extra detail.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What pets does Melanie have?", + "expectedAnswer": "Two cats and a dog", + "evidence": [ + "D13:4" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie has a dog, a cat, and three pets named Luna, Oliver, and Bailey.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer mentions a dog and a cat, but does not mention a second cat, and instead lists three pets by name, which does not match the gold answer of two cats and a dog.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "Where did Oliver hide his bone once?", + "expectedAnswer": "In Melanie's slipper", + "evidence": [ + "D13:6" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Oliver hid his bone in a slipper.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies that Oliver hid his bone in a slipper, which matches the key detail from the gold answer, even though it omits \"Melanie's\" specifically.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.5, + "evaluationMethod": "llm" + }, + { + "question": "What activity did Caroline used to do with her dad?", + "expectedAnswer": "Horseback riding", + "evidence": [ + "D13:7" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline used to go horseback riding with her dad.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer by stating that Caroline used to go horseback riding with her dad.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What did Caroline make for a local church?", + "expectedAnswer": "a stained glass window", + "evidence": [ + "D14:17" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline made a stained glass window for a local church.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer matches the gold answer exactly, correctly identifying that Caroline made a stained glass window for a local church.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "What did Caroline find in her neighborhood during her walk?", + "expectedAnswer": "a rainbow sidewalk", + "evidence": [ + "D14:23" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline found a rainbow-painted sidewalk in her neighborhood during her walk.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately mentions that Caroline found a rainbow-painted sidewalk, which matches the gold answer's \"a rainbow sidewalk.\"\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Which song motivates Caroline to be courageous?", + "expectedAnswer": "Brave by Sara Bareilles", + "evidence": [ + "D15:23" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The song \"Brave\" by Sara Bareilles motivates Caroline to be courageous.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly identifies \"Brave\" by Sara Bareilles as the song that motivates Caroline to be courageous, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Which classical musicians does Melanie enjoy listening to?", + "expectedAnswer": "Bach and Mozart", + "evidence": [ + "D15:28" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie enjoys listening to classical composers such as Bach and Mozart.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer correctly mentions both Bach and Mozart as classical musicians Melanie enjoys, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "Who is Melanie a fan of in terms of modern music?", + "expectedAnswer": "Ed Sheeran", + "evidence": [ + "D15:28" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie is a fan of Ed Sheeran in terms of modern music.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer directly matches the gold answer by stating that Melanie is a fan of Ed Sheeran in terms of modern music.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 1, + "evaluationMethod": "llm" + }, + { + "question": "How long has Melanie been creating art?", + "expectedAnswer": "7 years", + "evidence": [ + "D16:7" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide information about how long Melanie has been creating art.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention the duration of 7 years or provide any information about how long Melanie has been creating art.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What precautionary sign did Melanie see at the café?", + "expectedAnswer": "A sign stating that someone is not being able to leave", + "evidence": [ + "D16:16" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide the exact wording or specific details of the precautionary sign that Melanie saw at the café. It only mentions that Melanie shared a photo of a sign posted on a door, referenced its message about someone not being able to leave, and clarified that the sign was just a precaution.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\n \"label\": \"CORRECT\"\n}", + "matchRatio": 0.875, + "evaluationMethod": "llm" + }, + { + "question": "What advice does Caroline give for getting started with adoption?", + "expectedAnswer": "Do research, find an adoption agency or lawyer, gather necessary documents, and prepare emotionally.", + "evidence": [ + "D17:7" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline advises researching and selecting an adoption agency or lawyer, gathering required documents such as references, financial information, and medical checks, and preparing emotionally for the challenges of the waiting period.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.45454545454545453, + "evaluationMethod": "llm" + }, + { + "question": "What setback did Melanie face in October 2023?", + "expectedAnswer": "She got hurt and had to take a break from pottery.", + "evidence": [ + "D17:8" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie faced an injury in October 2023, which required her to take a break from pottery.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately states that Melanie was injured in October 2023 and had to take a break from pottery, matching the gold answer in both content and detail.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.4444444444444444, + "evaluationMethod": "llm" + }, + { + "question": "What does Melanie do to keep herself busy during her pottery break?", + "expectedAnswer": "Read a book and paint.", + "evidence": [ + "D17:10" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "During her pottery break, Melanie keeps herself busy by painting and reading a book that Caroline recommended.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer mentions both painting and reading a book, which matches the activities listed in the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.75, + "evaluationMethod": "llm" + }, + { + "question": "What painting did Melanie show to Caroline on October 13, 2023?", + "expectedAnswer": "A painting inspired by sunsets with a pink sky.", + "evidence": [ + "D17:12" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "On October 13, 2023, Melanie showed Caroline a painting on a wall with a blue background and also invited Caroline to view her abstract painting.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention a painting inspired by sunsets with a pink sky, instead describing a painting with a blue background and an abstract painting, which does not match the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.3333333333333333, + "evaluationMethod": "llm" + }, + { + "question": "What kind of painting did Caroline share with Melanie on October 13, 2023?", + "expectedAnswer": "An abstract painting with blue streaks on a wall.", + "evidence": [ + "D17:14" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain information about Caroline sharing any painting with Melanie on October 13, 2023. It only mentions Melanie sharing a painting with Caroline on that date.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer correctly states that there is no information about Caroline sharing a painting with Melanie on that date, which means it does not provide the referenced topic from the gold answer.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.3333333333333333, + "evaluationMethod": "llm" + }, + { + "question": "What was the poetry reading that Caroline attended about?", + "expectedAnswer": "It was a transgender poetry reading where transgender people shared their stories.", + "evidence": [ + "D17:18" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The poetry reading that Caroline attended was a transgender poetry reading where transgender people shared their stories through poetry, providing a safe space for self-expression and celebrating trans identities.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately captures the main topic of the poetry reading as a transgender poetry reading where transgender people shared their stories, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.9, + "evaluationMethod": "llm" + }, + { + "question": "What did the posters at the poetry reading say?", + "expectedAnswer": "\"Trans Lives Matter\"", + "evidence": [ + "D17:19" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide the specific text of the posters at the poetry reading; it only states that the posters conveyed pride and strength.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer does not mention the specific text \"Trans Lives Matter\" as in the gold answer, so it does not address the question directly.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What does Caroline's drawing symbolize for her?", + "expectedAnswer": "Freedom and being true to herself.", + "evidence": [ + "D17:23" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Caroline's drawing symbolizes freedom, authenticity, and womanhood. It serves as a reminder for her to always stay true to herself and embrace her identity as a woman.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer includes the key concepts of freedom and being true to herself, matching the gold answer, and adds some extra detail without contradicting it.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.6, + "evaluationMethod": "llm" + }, + { + "question": "How do Melanie and Caroline describe their journey through life together?", + "expectedAnswer": "An ongoing adventure of learning and growing.", + "evidence": [ + "D17:25" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie and Caroline describe their journey through life together as a mutual adventure of learning, growth, and self-acceptance, characterized by authenticity, open communication, and shared exploration.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.6, + "evaluationMethod": "llm" + }, + { + "question": "What happened to Melanie's son on their road trip?", + "expectedAnswer": "He got into an accident", + "evidence": [ + "D18:1" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's son was involved in a car accident during their road trip on the weekend of October 14th-15th, 2023, but he was unharmed.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately states that Melanie's son was involved in a car accident during the road trip, matching the gold answer's key detail.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.3333333333333333, + "evaluationMethod": "llm" + }, + { + "question": "How did Melanie's son handle the accident?", + "expectedAnswer": "He was scared but reassured by his family", + "evidence": [ + "D18:6", + "D18:7" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not provide specific information about how Melanie's son personally handled the accident. It only states that he was involved in a car accident, was unharmed, and that Melanie's children (in general) demonstrated resilience and were characterized as tough in the aftermath.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0.16666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "How did Melanie feel about her family after the accident?", + "expectedAnswer": "They are important and mean the world to her", + "evidence": [ + "D18:5" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the context, after the accident, Melanie felt that her family was loving and supportive, and she affirmed that their love and support helped her through tough times. She also provided reassurance and emotional support to her children, highlighting the family's resilience and her positive feelings about their ability to cope together.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer emphasizes Melanie's positive feelings toward her family, their importance, and the support they provided, which aligns with the gold answer's sentiment that they mean the world to her.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.375, + "evaluationMethod": "llm" + }, + { + "question": "How did Melanie's children handle the accident?", + "expectedAnswer": "They were scared but resilient", + "evidence": [ + "D18:7" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's children were initially scared following their brother's car accident, but they demonstrated resilience and were characterized by Melanie as tough. With reassurance and emotional support from Melanie and the family, they were able to cope with the adversity in the aftermath of the accident.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately reflects that Melanie's children were scared but resilient, matching the key points of the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.8, + "evaluationMethod": "llm" + }, + { + "question": "How did Melanie feel after the accident?", + "expectedAnswer": "Grateful and thankful for her family", + "evidence": [ + "D18:5" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Based on the provided context, Melanie felt concerned for her children after the accident but took on a reassuring and supportive role, providing emotional support to help them cope. She characterized her children as tough and focused on reassuring them that their brother would be okay, highlighting her resilience and commitment to her family's emotional well-being.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "What was Melanie's reaction to her children enjoying the Grand Canyon?", + "expectedAnswer": "She was happy and thankful", + "evidence": [ + "D18:5" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "The context does not contain information about Melanie's reaction to her children enjoying the Grand Canyon.", + "evaluationResult": "WRONG", + "evaluationReasoning": "The generated answer states that there is no information about Melanie's reaction, which does not address the gold answer indicating she was happy and thankful.\n\n{\"label\": \"WRONG\"}", + "matchRatio": 0.2, + "evaluationMethod": "llm" + }, + { + "question": "What do Melanie's family give her?", + "expectedAnswer": "Strength and motivation", + "evidence": [ + "D18:9" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie's family give her active support, love, significant help, ongoing emotional support, and are her primary source of happiness and motivation.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "{\"label\": \"CORRECT\"}", + "matchRatio": 0.6666666666666666, + "evaluationMethod": "llm" + }, + { + "question": "How did Melanie feel about her family supporting her?", + "expectedAnswer": "She appreciated them a lot", + "evidence": [ + "D18:13" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie felt grateful and positively impacted by her family's support, describing their love and support as helping her through tough times and providing her with significant help. She identifies her family as her primary source of happiness, motivation, and resilience.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer accurately reflects Melanie's appreciation and gratitude for her family's support, which aligns with the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0.25, + "evaluationMethod": "llm" + }, + { + "question": "What did Melanie do after the road trip to relax?", + "expectedAnswer": "Went on a nature walk or hike", + "evidence": [ + "D18:17" + ], + "category": 4, + "conversationId": "locomo_1", + "generatedAnswer": "Melanie engaged in a family activity that her kids loved as a way to relax and recover emotionally after the road trip.", + "evaluationResult": "WRONG", + "evaluationReasoning": "{\"label\": \"WRONG\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What did Caroline realize after her charity race?", + "evidence": [ + "D2:3" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What are Melanie's plans for the summer with respect to adoption?", + "evidence": [ + "D2:8" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What type of individuals does the adoption agency Melanie is considering support?", + "evidence": [ + "D2:12" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Why did Melanie choose the adoption agency?", + "evidence": [ + "D2:12" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What is Melanie excited about in her adoption process?", + "evidence": [ + "D2:14" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What does Melanie's necklace symbolize?", + "evidence": [ + "D4:3" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What country is Melanie's grandma from?", + "evidence": [ + "D4:3" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What was grandma's gift to Melanie?", + "evidence": [ + "D4:3" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What was grandpa's gift to Caroline?", + "evidence": [ + "D4:3" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What is Caroline's hand-painted bowl a reminder of?", + "evidence": [ + "D4:5" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What did Caroline and her family do while camping?", + "evidence": [ + "D4:8" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What kind of counseling and mental health services is Melanie interested in pursuing?", + "evidence": [ + "D4:13" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What kind of counseling workshop did Melanie attend recently?", + "evidence": [ + "D4:13" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What motivated Melanie to pursue counseling?", + "evidence": [ + "D4:15" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What kind of place does Melanie want to create for people?", + "evidence": [ + "D4:15" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Did Caroline make the black and white bowl in the photo?", + "expectedAnswer": "No", + "evidence": [ + "D5:8" + ], + "category": 5, + "conversationId": "locomo_1", + "generatedAnswer": "No, Caroline did not make the black and white bowl in the photo. Caroline asked Melanie if she made the bowl, indicating that Caroline was not the creator.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer clearly states that Caroline did not make the black and white bowl, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "What are the new shoes that Caroline got used for?", + "evidence": [ + "D7:19" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What is Caroline's reason for getting into running?", + "evidence": [ + "D7:21" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What does Caroline say running has been great for?", + "evidence": [ + "D7:24" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What did Melanie see at the council meeting for adoption?", + "evidence": [ + "D8:9" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What inspired Melanie's painting for the art show?", + "evidence": [ + "D9:16" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What inspired Caroline's sculpture for the art show?", + "evidence": [ + "D9:16" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "How often does Caroline go to the beach with her kids?", + "evidence": [ + "D10:10" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What did Caroline and her family see during their camping trip last year?", + "evidence": [ + "D10:14" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "How did Caroline feel while watching the meteor shower?", + "evidence": [ + "D10:18" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Why did Caroline choose to use colors and patterns in her pottery project?", + "evidence": [ + "D12:6" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Is Oscar Melanie's pet?", + "expectedAnswer": "No", + "evidence": [ + "D13:3" + ], + "category": 5, + "conversationId": "locomo_1", + "generatedAnswer": "No, Oscar is not Melanie's pet. Oscar is Caroline's guinea pig.", + "evaluationResult": "CORRECT", + "evaluationReasoning": "The generated answer clearly states that Oscar is not Melanie's pet and provides additional context, matching the gold answer.\n\n{\"label\": \"CORRECT\"}", + "matchRatio": 0, + "evaluationMethod": "llm" + }, + { + "question": "Where did Oscar hide his bone once?", + "evidence": [ + "D13:6" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What activity did Melanie used to do with her dad?", + "evidence": [ + "D13:7" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What did Melanie make for a local church?", + "evidence": [ + "D14:17" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What did Melanie find in her neighborhood during her walk?", + "evidence": [ + "D14:23" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Which song motivates Melanie to be courageous?", + "evidence": [ + "D15:23" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What type of instrument does Caroline play?", + "evidence": [ + "D15:26" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Which classical musicians does Caroline enjoy listening to?", + "evidence": [ + "D15:28" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "Who is Caroline a fan of in terms of modern music?", + "evidence": [ + "D15:28" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What precautionary sign did Caroline see at the café?", + "evidence": [ + "D16:16" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What setback did Caroline face recently?", + "evidence": [ + "D17:8" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What does Caroline do to keep herself busy during her pottery break?", + "evidence": [ + "D17:10" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What was the poetry reading that Melanie attended about?", + "evidence": [ + "D17:18" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What happened to Caroline's son on their road trip?", + "evidence": [ + "D18:1" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "How did Caroline's son handle the accident?", + "evidence": [ + "D18:6", + "D18:7" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "How did Caroline feel about her family after the accident?", + "evidence": [ + "D18:5" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "How did Caroline's children handle the accident?", + "evidence": [ + "D18:7" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "How did Caroline feel after the accident?", + "evidence": [ + "D18:5" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What was Caroline's reaction to her children enjoying the Grand Canyon?", + "evidence": [ + "D18:5" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What did Caroline do after the road trip to relax?", + "evidence": [ + "D18:17" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" + }, + { + "question": "What does Caroline love most about camping with her family?", + "evidence": [ + "D18:21" + ], + "category": 5, + "conversationId": "locomo_1", + "error": "Cannot read properties of undefined (reading 'toString')", + "generatedAnswer": "Error: Evaluation failed", + "evaluationResult": "ERROR", + "evaluationReasoning": "Evaluation failed: Cannot read properties of undefined (reading 'toString')", + "matchRatio": 0, + "evaluationMethod": "error" } ], "totalQuestions": 199 diff --git a/benchmarks/ingest_conversations.js b/benchmarks/ingest_conversations.js index d713aa7..1a73850 100755 --- a/benchmarks/ingest_conversations.js +++ b/benchmarks/ingest_conversations.js @@ -11,10 +11,10 @@ const axios = require("axios"); */ class LocomoIngester { - constructor(baseUrl = process.env.BASE_URL) { - this.baseUrl = baseUrl; + constructor() { + this.baseUrl = "http://3.95.55.23:3033"; this.headers = { - Authorization: `Bearer ${process.env.API_KEY}`, + Authorization: `Bearer rc_pat_kbc76ykt3gd81r6ctyeh8as5jryihbeqqvnsi2wt`, }; this.statusFile = path.join(__dirname, "ingestion_status.json"); @@ -73,6 +73,7 @@ class LocomoIngester { return false; } + console.log(`BASE URL: ${this.baseUrl} ${process.env.BASE_URL}`); console.log(`Ingesting conversation ${conversationId}...`); const episodes = this.formatConversationForIngestion(conversation, conversationId); @@ -131,7 +132,12 @@ class LocomoIngester { // Process each session Object.keys(conv).forEach((key) => { - if (key.startsWith("session_") && !key.endsWith("_date_time")) { + if ( + key.startsWith("session_") && + !key.endsWith("_date_time") + // ["session_1"].includes(key) + ) { + console.log(`Processing session ${key}`); const sessionNumber = key.replace("session_", ""); const sessionData = conv[key]; const sessionDateTime = conv[`session_${sessionNumber}_date_time`]; @@ -139,7 +145,7 @@ class LocomoIngester { if (Array.isArray(sessionData)) { sessionData.forEach((dialog, dialogIndex) => { episodes.push({ - content: `${dialog.speaker}: ${dialog.blip_caption ? `Shared ${dialog.blip_caption}.` : ""} ${dialog.text}`, + content: `${dialog.speaker}: ${dialog.blip_caption ? `Shared ${dialog.blip_caption}. ${dialog.query}.` : ""} ${dialog.text}`, metadata: { conversationId, sessionNumber: parseInt(sessionNumber), @@ -215,7 +221,7 @@ class LocomoIngester { // Ingest each conversation for (let i = 0; i < conversations.length; i++) { - if (i === 0) { + if (i === 4) { const conversation = conversations[i]; const conversationId = `locomo_${i + 1}`; diff --git a/benchmarks/ingestion_status.json b/benchmarks/ingestion_status.json index bcc993e..84d8034 100644 --- a/benchmarks/ingestion_status.json +++ b/benchmarks/ingestion_status.json @@ -2,11 +2,39 @@ "conversations": { "locomo_1": { "ingested": true, - "timestamp": "2025-08-12T05:31:39.437Z", + "timestamp": "2025-08-22T14:47:13.572Z", "totalEpisodes": 419, "successCount": 419, "errorCount": 0 + }, + "locomo_2": { + "ingested": true, + "timestamp": "2025-08-22T14:50:26.625Z", + "totalEpisodes": 369, + "successCount": 369, + "errorCount": 0 + }, + "locomo_3": { + "ingested": true, + "timestamp": "2025-08-22T14:55:30.262Z", + "totalEpisodes": 663, + "successCount": 663, + "errorCount": 0 + }, + "locomo_4": { + "ingested": true, + "timestamp": "2025-08-22T15:00:02.753Z", + "totalEpisodes": 629, + "successCount": 629, + "errorCount": 0 + }, + "locomo_5": { + "ingested": true, + "timestamp": "2025-08-22T15:04:17.340Z", + "totalEpisodes": 680, + "successCount": 680, + "errorCount": 0 } }, - "timestamp": "2025-08-12T05:31:39.441Z" + "timestamp": "2025-08-22T15:04:17.340Z" } \ No newline at end of file diff --git a/integrations/github/pnpm-lock.yaml b/integrations/github/pnpm-lock.yaml index 17f1db0..cda6f35 100644 --- a/integrations/github/pnpm-lock.yaml +++ b/integrations/github/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@redplanethq/sdk': - specifier: 0.1.1 - version: 0.1.1 + specifier: 0.1.2 + version: 0.1.2 axios: specifier: ^1.7.9 version: 1.11.0 @@ -466,8 +466,8 @@ packages: resolution: {integrity: sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==} engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0} - '@redplanethq/sdk@0.1.1': - resolution: {integrity: sha512-tfR1c9p7vNeCL5jsF9QlEZcRFLsihaHe/ZQWVKZYXzAZ6GugoIFBaayGfVvjNjyEnN3nlrl3usKBX+hhaKzg0g==} + '@redplanethq/sdk@0.1.2': + resolution: {integrity: sha512-Si+ae2OV0UNy7yvLECjJ1Y/6HHhRO0yU9svIFvBzMlaR/57SDP+KJZLyfWx/PJX+hOuWipyoYTLA9aYULt6z2w==} engines: {node: '>=18.0.0'} '@rollup/rollup-android-arm-eabi@4.46.2': @@ -2455,7 +2455,7 @@ snapshots: '@pkgr/core@0.2.9': {} - '@redplanethq/sdk@0.1.1': + '@redplanethq/sdk@0.1.2': dependencies: commander: 14.0.0 diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 03c945b..1c07514 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -496,6 +496,9 @@ importers: cmdk: specifier: ^0.2.1 version: 0.2.1(@types/react@18.2.69)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + cohere-ai: + specifier: ^7.18.1 + version: 7.18.1(encoding@0.1.13) compression: specifier: ^1.7.4 version: 1.8.0 @@ -974,6 +977,14 @@ packages: '@aws-crypto/util@5.2.0': resolution: {integrity: sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==} + '@aws-sdk/client-cognito-identity@3.864.0': + resolution: {integrity: sha512-IH3RSg/Zy2+yXQ2d4jmMk2U8A+BuJ9uNUYPWAg144yUUxanN1Czb+GyFKeJO4NGhVnn5D+j1YoRLpJN8PW2B0g==} + engines: {node: '>=18.0.0'} + + '@aws-sdk/client-sagemaker@3.870.0': + resolution: {integrity: sha512-JFJnUmUj7oy+IQ6BPCkELkygKmxhK8Ng/CiuemAVbLV23T0qu455NkphmftBZcQyf819g6F17NNpWKhpvufWdw==} + engines: {node: '>=18.0.0'} + '@aws-sdk/client-ses@3.828.0': resolution: {integrity: sha512-j2w9HugNIJUA202o9YKGuN84GY+swNIjHch6vSrpARYISkzUr+zIYp7u/ygmUjeBtsnjM6RsmGU7miGs9Xt7bA==} engines: {node: '>=18.0.0'} @@ -982,74 +993,154 @@ packages: resolution: {integrity: sha512-qxw8JcPTaFaBwTBUr4YmLajaMh3En65SuBWAKEtjctbITRRekzR7tvr/TkwoyVOh+XoAtkwOn+BQeQbX+/wgHw==} engines: {node: '>=18.0.0'} + '@aws-sdk/client-sso@3.864.0': + resolution: {integrity: sha512-THiOp0OpQROEKZ6IdDCDNNh3qnNn/kFFaTSOiugDpgcE5QdsOxh1/RXq7LmHpTJum3cmnFf8jG59PHcz9Tjnlw==} + engines: {node: '>=18.0.0'} + '@aws-sdk/core@3.826.0': resolution: {integrity: sha512-BGbQYzWj3ps+dblq33FY5tz/SsgJCcXX0zjQlSC07tYvU1jHTUvsefphyig+fY38xZ4wdKjbTop+KUmXUYrOXw==} engines: {node: '>=18.0.0'} + '@aws-sdk/core@3.864.0': + resolution: {integrity: sha512-LFUREbobleHEln+Zf7IG83lAZwvHZG0stI7UU0CtwyuhQy5Yx0rKksHNOCmlM7MpTEbSCfntEhYi3jUaY5e5lg==} + engines: {node: '>=18.0.0'} + + '@aws-sdk/credential-provider-cognito-identity@3.864.0': + resolution: {integrity: sha512-jF6xJS67nPvJ/ElvdA2Q/EDArTcd0fKS3R6zImupOkTMm9PwmEM/BM7hpQCUFkVcaUhtvPpYCtuolGq9ezuKng==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-env@3.826.0': resolution: {integrity: sha512-DK3pQY8+iKK3MGDdC3uOZQ2psU01obaKlTYhEwNu4VWzgwQL4Vi3sWj4xSWGEK41vqZxiRLq6fOq7ysRI+qEZA==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-env@3.864.0': + resolution: {integrity: sha512-StJPOI2Rt8UE6lYjXUpg6tqSZaM72xg46ljPg8kIevtBAAfdtq9K20qT/kSliWGIBocMFAv0g2mC0hAa+ECyvg==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-http@3.826.0': resolution: {integrity: sha512-N+IVZBh+yx/9GbMZTKO/gErBi/FYZQtcFRItoLbY+6WU+0cSWyZYfkoeOxHmQV3iX9k65oljERIWUmL9x6OSQg==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-http@3.864.0': + resolution: {integrity: sha512-E/RFVxGTuGnuD+9pFPH2j4l6HvrXzPhmpL8H8nOoJUosjx7d4v93GJMbbl1v/fkDLqW9qN4Jx2cI6PAjohA6OA==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-ini@3.828.0': resolution: {integrity: sha512-T3DJMo2/j7gCPpFg2+xEHWgua05t8WP89ye7PaZxA2Fc6CgScHkZsJZTri1QQIU2h+eOZ75EZWkeFLIPgN0kRQ==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-ini@3.864.0': + resolution: {integrity: sha512-PlxrijguR1gxyPd5EYam6OfWLarj2MJGf07DvCx9MAuQkw77HBnsu6+XbV8fQriFuoJVTBLn9ROhMr/ROAYfUg==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-node@3.828.0': resolution: {integrity: sha512-9z3iPwVYOQYNzVZj8qycZaS/BOSKRXWA+QVNQlfEnQ4sA4sOcKR4kmV2h+rJcuBsSFfmOF62ZDxyIBGvvM4t/w==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-node@3.864.0': + resolution: {integrity: sha512-2BEymFeXURS+4jE9tP3vahPwbYRl0/1MVaFZcijj6pq+nf5EPGvkFillbdBRdc98ZI2NedZgSKu3gfZXgYdUhQ==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-process@3.826.0': resolution: {integrity: sha512-kURrc4amu3NLtw1yZw7EoLNEVhmOMRUTs+chaNcmS+ERm3yK0nKjaJzmKahmwlTQTSl3wJ8jjK7x962VPo+zWw==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-process@3.864.0': + resolution: {integrity: sha512-Zxnn1hxhq7EOqXhVYgkF4rI9MnaO3+6bSg/tErnBQ3F8kDpA7CFU24G1YxwaJXp2X4aX3LwthefmSJHwcVP/2g==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-sso@3.828.0': resolution: {integrity: sha512-9CEAXzUDSzOjOCb3XfM15TZhTaM+l07kumZyx2z8NC6T2U4qbCJqn4h8mFlRvYrs6cBj2SN40sD3r5Wp0Cq2Kw==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-sso@3.864.0': + resolution: {integrity: sha512-UPyPNQbxDwHVGmgWdGg9/9yvzuedRQVF5jtMkmP565YX9pKZ8wYAcXhcYdNPWFvH0GYdB0crKOmvib+bmCuwkw==} + engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-web-identity@3.828.0': resolution: {integrity: sha512-MguDhGHlQBeK9CQ/P4NOY0whAJ4HJU4x+f1dphg3I1sGlccFqfB8Moor2vXNKu0Th2kvAwkn9pr7gGb/+NGR9g==} engines: {node: '>=18.0.0'} + '@aws-sdk/credential-provider-web-identity@3.864.0': + resolution: {integrity: sha512-nNcjPN4SYg8drLwqK0vgVeSvxeGQiD0FxOaT38mV2H8cu0C5NzpvA+14Xy+W6vT84dxgmJYKk71Cr5QL2Oz+rA==} + engines: {node: '>=18.0.0'} + + '@aws-sdk/credential-providers@3.864.0': + resolution: {integrity: sha512-k4K7PzvHpdHQLczgWT26Yk6t+VBwZ35jkIQ3dKODvBjfzlYHTX0y+VgemmDWrat1ahKfYb/OAw/gdwmnyxsAsw==} + engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-host-header@3.821.0': resolution: {integrity: sha512-xSMR+sopSeWGx5/4pAGhhfMvGBHioVBbqGvDs6pG64xfNwM5vq5s5v6D04e2i+uSTj4qGa71dLUs5I0UzAK3sw==} engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-host-header@3.862.0': + resolution: {integrity: sha512-jDje8dCFeFHfuCAxMDXBs8hy8q9NCTlyK4ThyyfAj3U4Pixly2mmzY2u7b7AyGhWsjJNx8uhTjlYq5zkQPQCYw==} + engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-logger@3.821.0': resolution: {integrity: sha512-0cvI0ipf2tGx7fXYEEN5fBeZDz2RnHyb9xftSgUsEq7NBxjV0yTZfLJw6Za5rjE6snC80dRN8+bTNR1tuG89zA==} engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-logger@3.862.0': + resolution: {integrity: sha512-N/bXSJznNBR/i7Ofmf9+gM6dx/SPBK09ZWLKsW5iQjqKxAKn/2DozlnE54uiEs1saHZWoNDRg69Ww4XYYSlG1Q==} + engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-recursion-detection@3.821.0': resolution: {integrity: sha512-efmaifbhBoqKG3bAoEfDdcM8hn1psF+4qa7ykWuYmfmah59JBeqHLfz5W9m9JoTwoKPkFcVLWZxnyZzAnVBOIg==} engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-recursion-detection@3.862.0': + resolution: {integrity: sha512-KVoo3IOzEkTq97YKM4uxZcYFSNnMkhW/qj22csofLegZi5fk90ztUnnaeKfaEJHfHp/tm1Y3uSoOXH45s++kKQ==} + engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-user-agent@3.828.0': resolution: {integrity: sha512-nixvI/SETXRdmrVab4D9LvXT3lrXkwAWGWk2GVvQvzlqN1/M/RfClj+o37Sn4FqRkGH9o9g7Fqb1YqZ4mqDAtA==} engines: {node: '>=18.0.0'} + '@aws-sdk/middleware-user-agent@3.864.0': + resolution: {integrity: sha512-wrddonw4EyLNSNBrApzEhpSrDwJiNfjxDm5E+bn8n32BbAojXASH8W8jNpxz/jMgNkkJNxCfyqybGKzBX0OhbQ==} + engines: {node: '>=18.0.0'} + '@aws-sdk/nested-clients@3.828.0': resolution: {integrity: sha512-xmeOILiR9LvfC8MctgeRXXN8nQTwbOvO4wHvgE8tDRsjnBpyyO0j50R4+viHXdMUGtgGkHEXRv8fFNBq54RgnA==} engines: {node: '>=18.0.0'} + '@aws-sdk/nested-clients@3.864.0': + resolution: {integrity: sha512-H1C+NjSmz2y8Tbgh7Yy89J20yD/hVyk15hNoZDbCYkXg0M358KS7KVIEYs8E2aPOCr1sK3HBE819D/yvdMgokA==} + engines: {node: '>=18.0.0'} + '@aws-sdk/region-config-resolver@3.821.0': resolution: {integrity: sha512-t8og+lRCIIy5nlId0bScNpCkif8sc0LhmtaKsbm0ZPm3sCa/WhCbSZibjbZ28FNjVCV+p0D9RYZx0VDDbtWyjw==} engines: {node: '>=18.0.0'} + '@aws-sdk/region-config-resolver@3.862.0': + resolution: {integrity: sha512-VisR+/HuVFICrBPY+q9novEiE4b3mvDofWqyvmxHcWM7HumTz9ZQSuEtnlB/92GVM3KDUrR9EmBHNRrfXYZkcQ==} + engines: {node: '>=18.0.0'} + '@aws-sdk/token-providers@3.828.0': resolution: {integrity: sha512-JdOjI/TxkfQpY/bWbdGMdCiePESXTbtl6MfnJxz35zZ3tfHvBnxAWCoYJirdmjzY/j/dFo5oEyS6mQuXAG9w2w==} engines: {node: '>=18.0.0'} + '@aws-sdk/token-providers@3.864.0': + resolution: {integrity: sha512-gTc2QHOBo05SCwVA65dUtnJC6QERvFaPiuppGDSxoF7O5AQNK0UR/kMSenwLqN8b5E1oLYvQTv3C1idJLRX0cg==} + engines: {node: '>=18.0.0'} + '@aws-sdk/types@3.821.0': resolution: {integrity: sha512-Znroqdai1a90TlxGaJ+FK1lwC0fHpo97Xjsp5UKGR5JODYm7f9+/fF17ebO1KdoBr/Rm0UIFiF5VmI8ts9F1eA==} engines: {node: '>=18.0.0'} + '@aws-sdk/types@3.862.0': + resolution: {integrity: sha512-Bei+RL0cDxxV+lW2UezLbCYYNeJm6Nzee0TpW0FfyTRBhH9C1XQh4+x+IClriXvgBnRquTMMYsmJfvx8iyLKrg==} + engines: {node: '>=18.0.0'} + '@aws-sdk/util-endpoints@3.828.0': resolution: {integrity: sha512-RvKch111SblqdkPzg3oCIdlGxlQs+k+P7Etory9FmxPHyPDvsP1j1c74PmgYqtzzMWmoXTjd+c9naUHh9xG8xg==} engines: {node: '>=18.0.0'} + '@aws-sdk/util-endpoints@3.862.0': + resolution: {integrity: sha512-eCZuScdE9MWWkHGM2BJxm726MCmWk/dlHjOKvkM0sN1zxBellBMw5JohNss1Z8/TUmnW2gb9XHTOiHuGjOdksA==} + engines: {node: '>=18.0.0'} + '@aws-sdk/util-locate-window@3.804.0': resolution: {integrity: sha512-zVoRfpmBVPodYlnMjgVjfGoEZagyRF5IPn3Uo6ZvOZp24chnW/FRstH7ESDHDDRga4z3V+ElUQHKpFDXWyBW5A==} engines: {node: '>=18.0.0'} @@ -1057,6 +1148,9 @@ packages: '@aws-sdk/util-user-agent-browser@3.821.0': resolution: {integrity: sha512-irWZHyM0Jr1xhC+38OuZ7JB6OXMLPZlj48thElpsO1ZSLRkLZx5+I7VV6k3sp2yZ7BYbKz/G2ojSv4wdm7XTLw==} + '@aws-sdk/util-user-agent-browser@3.862.0': + resolution: {integrity: sha512-BmPTlm0r9/10MMr5ND9E92r8KMZbq5ltYXYpVcUbAsnB1RJ8ASJuRoLne5F7mB3YMx0FJoOTuSq7LdQM3LgW3Q==} + '@aws-sdk/util-user-agent-node@3.828.0': resolution: {integrity: sha512-LdN6fTBzTlQmc8O8f1wiZN0qF3yBWVGis7NwpWK7FUEzP9bEZRxYfIkV9oV9zpt6iNRze1SedK3JQVB/udxBoA==} engines: {node: '>=18.0.0'} @@ -1066,10 +1160,23 @@ packages: aws-crt: optional: true + '@aws-sdk/util-user-agent-node@3.864.0': + resolution: {integrity: sha512-d+FjUm2eJEpP+FRpVR3z6KzMdx1qwxEYDz8jzNKwxYLBBquaBaP/wfoMtMQKAcbrR7aT9FZVZF7zDgzNxUvQlQ==} + engines: {node: '>=18.0.0'} + peerDependencies: + aws-crt: '>=1.0.0' + peerDependenciesMeta: + aws-crt: + optional: true + '@aws-sdk/xml-builder@3.821.0': resolution: {integrity: sha512-DIIotRnefVL6DiaHtO6/21DhJ4JZnnIwdNbpwiAhdt/AVbttcE4yw925gsjur0OGv5BTYXQXU3YnANBYnZjuQA==} engines: {node: '>=18.0.0'} + '@aws-sdk/xml-builder@3.862.0': + resolution: {integrity: sha512-6Ed0kmC1NMbuFTEgNmamAUU1h5gShgxL1hBVLbEzUa3trX5aJBz1vU4bXaBTvOYUAnOHtiy1Ml4AMStd6hJnFA==} + engines: {node: '>=18.0.0'} + '@babel/code-frame@7.27.1': resolution: {integrity: sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==} engines: {node: '>=6.9.0'} @@ -4199,30 +4306,58 @@ packages: resolution: {integrity: sha512-gJnEjZMvigPDQWHrW3oPrFhQtkrgqBkyjj3pCIdF3A5M6vsZODG93KNlfJprv6bp4245bdT32fsHK4kkH3KYDA==} engines: {node: '>=18.0.0'} + '@smithy/abort-controller@4.0.5': + resolution: {integrity: sha512-jcrqdTQurIrBbUm4W2YdLVMQDoL0sA9DTxYd2s+R/y+2U9NLOP7Xf/YqfSg1FZhlZIYEnvk2mwbyvIfdLEPo8g==} + engines: {node: '>=18.0.0'} + '@smithy/config-resolver@4.1.4': resolution: {integrity: sha512-prmU+rDddxHOH0oNcwemL+SwnzcG65sBF2yXRO7aeXIn/xTlq2pX7JLVbkBnVLowHLg4/OL4+jBmv9hVrVGS+w==} engines: {node: '>=18.0.0'} + '@smithy/config-resolver@4.1.5': + resolution: {integrity: sha512-viuHMxBAqydkB0AfWwHIdwf/PRH2z5KHGUzqyRtS/Wv+n3IHI993Sk76VCA7dD/+GzgGOmlJDITfPcJC1nIVIw==} + engines: {node: '>=18.0.0'} + '@smithy/core@3.5.3': resolution: {integrity: sha512-xa5byV9fEguZNofCclv6v9ra0FYh5FATQW/da7FQUVTic94DfrN/NvmKZjrMyzbpqfot9ZjBaO8U1UeTbmSLuA==} engines: {node: '>=18.0.0'} + '@smithy/core@3.8.0': + resolution: {integrity: sha512-EYqsIYJmkR1VhVE9pccnk353xhs+lB6btdutJEtsp7R055haMJp2yE16eSxw8fv+G0WUY6vqxyYOP8kOqawxYQ==} + engines: {node: '>=18.0.0'} + '@smithy/credential-provider-imds@4.0.6': resolution: {integrity: sha512-hKMWcANhUiNbCJouYkZ9V3+/Qf9pteR1dnwgdyzR09R4ODEYx8BbUysHwRSyex4rZ9zapddZhLFTnT4ZijR4pw==} engines: {node: '>=18.0.0'} + '@smithy/credential-provider-imds@4.0.7': + resolution: {integrity: sha512-dDzrMXA8d8riFNiPvytxn0mNwR4B3h8lgrQ5UjAGu6T9z/kRg/Xncf4tEQHE/+t25sY8IH3CowcmWi+1U5B1Gw==} + engines: {node: '>=18.0.0'} + '@smithy/fetch-http-handler@5.0.4': resolution: {integrity: sha512-AMtBR5pHppYMVD7z7G+OlHHAcgAN7v0kVKEpHuTO4Gb199Gowh0taYi9oDStFeUhetkeP55JLSVlTW1n9rFtUw==} engines: {node: '>=18.0.0'} + '@smithy/fetch-http-handler@5.1.1': + resolution: {integrity: sha512-61WjM0PWmZJR+SnmzaKI7t7G0UkkNFboDpzIdzSoy7TByUzlxo18Qlh9s71qug4AY4hlH/CwXdubMtkcNEb/sQ==} + engines: {node: '>=18.0.0'} + '@smithy/hash-node@4.0.4': resolution: {integrity: sha512-qnbTPUhCVnCgBp4z4BUJUhOEkVwxiEi1cyFM+Zj6o+aY8OFGxUQleKWq8ltgp3dujuhXojIvJWdoqpm6dVO3lQ==} engines: {node: '>=18.0.0'} + '@smithy/hash-node@4.0.5': + resolution: {integrity: sha512-cv1HHkKhpyRb6ahD8Vcfb2Hgz67vNIXEp2vnhzfxLFGRukLCNEA5QdsorbUEzXma1Rco0u3rx5VTqbM06GcZqQ==} + engines: {node: '>=18.0.0'} + '@smithy/invalid-dependency@4.0.4': resolution: {integrity: sha512-bNYMi7WKTJHu0gn26wg8OscncTt1t2b8KcsZxvOv56XA6cyXtOAAAaNP7+m45xfppXfOatXF3Sb1MNsLUgVLTw==} engines: {node: '>=18.0.0'} + '@smithy/invalid-dependency@4.0.5': + resolution: {integrity: sha512-IVnb78Qtf7EJpoEVo7qJ8BEXQwgC4n3igeJNNKEj/MLYtapnx8A67Zt/J3RXAj2xSO1910zk0LdFiygSemuLow==} + engines: {node: '>=18.0.0'} + '@smithy/is-array-buffer@2.2.0': resolution: {integrity: sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==} engines: {node: '>=14.0.0'} @@ -4235,58 +4370,118 @@ packages: resolution: {integrity: sha512-F7gDyfI2BB1Kc+4M6rpuOLne5LOcEknH1n6UQB69qv+HucXBR1rkzXBnQTB2q46sFy1PM/zuSJOB532yc8bg3w==} engines: {node: '>=18.0.0'} + '@smithy/middleware-content-length@4.0.5': + resolution: {integrity: sha512-l1jlNZoYzoCC7p0zCtBDE5OBXZ95yMKlRlftooE5jPWQn4YBPLgsp+oeHp7iMHaTGoUdFqmHOPa8c9G3gBsRpQ==} + engines: {node: '>=18.0.0'} + '@smithy/middleware-endpoint@4.1.11': resolution: {integrity: sha512-zDogwtRLzKl58lVS8wPcARevFZNBOOqnmzWWxVe9XiaXU2CADFjvJ9XfNibgkOWs08sxLuSr81NrpY4mgp9OwQ==} engines: {node: '>=18.0.0'} + '@smithy/middleware-endpoint@4.1.18': + resolution: {integrity: sha512-ZhvqcVRPZxnZlokcPaTwb+r+h4yOIOCJmx0v2d1bpVlmP465g3qpVSf7wxcq5zZdu4jb0H4yIMxuPwDJSQc3MQ==} + engines: {node: '>=18.0.0'} + '@smithy/middleware-retry@4.1.12': resolution: {integrity: sha512-wvIH70c4e91NtRxdaLZF+mbLZ/HcC6yg7ySKUiufL6ESp6zJUSnJucZ309AvG9nqCFHSRB5I6T3Ez1Q9wCh0Ww==} engines: {node: '>=18.0.0'} + '@smithy/middleware-retry@4.1.19': + resolution: {integrity: sha512-X58zx/NVECjeuUB6A8HBu4bhx72EoUz+T5jTMIyeNKx2lf+Gs9TmWPNNkH+5QF0COjpInP/xSpJGJ7xEnAklQQ==} + engines: {node: '>=18.0.0'} + '@smithy/middleware-serde@4.0.8': resolution: {integrity: sha512-iSSl7HJoJaGyMIoNn2B7czghOVwJ9nD7TMvLhMWeSB5vt0TnEYyRRqPJu/TqW76WScaNvYYB8nRoiBHR9S1Ddw==} engines: {node: '>=18.0.0'} + '@smithy/middleware-serde@4.0.9': + resolution: {integrity: sha512-uAFFR4dpeoJPGz8x9mhxp+RPjo5wW0QEEIPPPbLXiRRWeCATf/Km3gKIVR5vaP8bN1kgsPhcEeh+IZvUlBv6Xg==} + engines: {node: '>=18.0.0'} + '@smithy/middleware-stack@4.0.4': resolution: {integrity: sha512-kagK5ggDrBUCCzI93ft6DjteNSfY8Ulr83UtySog/h09lTIOAJ/xUSObutanlPT0nhoHAkpmW9V5K8oPyLh+QA==} engines: {node: '>=18.0.0'} + '@smithy/middleware-stack@4.0.5': + resolution: {integrity: sha512-/yoHDXZPh3ocRVyeWQFvC44u8seu3eYzZRveCMfgMOBcNKnAmOvjbL9+Cp5XKSIi9iYA9PECUuW2teDAk8T+OQ==} + engines: {node: '>=18.0.0'} + '@smithy/node-config-provider@4.1.3': resolution: {integrity: sha512-HGHQr2s59qaU1lrVH6MbLlmOBxadtzTsoO4c+bF5asdgVik3I8o7JIOzoeqWc5MjVa+vD36/LWE0iXKpNqooRw==} engines: {node: '>=18.0.0'} + '@smithy/node-config-provider@4.1.4': + resolution: {integrity: sha512-+UDQV/k42jLEPPHSn39l0Bmc4sB1xtdI9Gd47fzo/0PbXzJ7ylgaOByVjF5EeQIumkepnrJyfx86dPa9p47Y+w==} + engines: {node: '>=18.0.0'} + '@smithy/node-http-handler@4.0.6': resolution: {integrity: sha512-NqbmSz7AW2rvw4kXhKGrYTiJVDHnMsFnX4i+/FzcZAfbOBauPYs2ekuECkSbtqaxETLLTu9Rl/ex6+I2BKErPA==} engines: {node: '>=18.0.0'} + '@smithy/node-http-handler@4.1.1': + resolution: {integrity: sha512-RHnlHqFpoVdjSPPiYy/t40Zovf3BBHc2oemgD7VsVTFFZrU5erFFe0n52OANZZ/5sbshgD93sOh5r6I35Xmpaw==} + engines: {node: '>=18.0.0'} + '@smithy/property-provider@4.0.4': resolution: {integrity: sha512-qHJ2sSgu4FqF4U/5UUp4DhXNmdTrgmoAai6oQiM+c5RZ/sbDwJ12qxB1M6FnP+Tn/ggkPZf9ccn4jqKSINaquw==} engines: {node: '>=18.0.0'} + '@smithy/property-provider@4.0.5': + resolution: {integrity: sha512-R/bswf59T/n9ZgfgUICAZoWYKBHcsVDurAGX88zsiUtOTA/xUAPyiT+qkNCPwFn43pZqN84M4MiUsbSGQmgFIQ==} + engines: {node: '>=18.0.0'} + '@smithy/protocol-http@5.1.2': resolution: {integrity: sha512-rOG5cNLBXovxIrICSBm95dLqzfvxjEmuZx4KK3hWwPFHGdW3lxY0fZNXfv2zebfRO7sJZ5pKJYHScsqopeIWtQ==} engines: {node: '>=18.0.0'} + '@smithy/protocol-http@5.1.3': + resolution: {integrity: sha512-fCJd2ZR7D22XhDY0l+92pUag/7je2BztPRQ01gU5bMChcyI0rlly7QFibnYHzcxDvccMjlpM/Q1ev8ceRIb48w==} + engines: {node: '>=18.0.0'} + '@smithy/querystring-builder@4.0.4': resolution: {integrity: sha512-SwREZcDnEYoh9tLNgMbpop+UTGq44Hl9tdj3rf+yeLcfH7+J8OXEBaMc2kDxtyRHu8BhSg9ADEx0gFHvpJgU8w==} engines: {node: '>=18.0.0'} + '@smithy/querystring-builder@4.0.5': + resolution: {integrity: sha512-NJeSCU57piZ56c+/wY+AbAw6rxCCAOZLCIniRE7wqvndqxcKKDOXzwWjrY7wGKEISfhL9gBbAaWWgHsUGedk+A==} + engines: {node: '>=18.0.0'} + '@smithy/querystring-parser@4.0.4': resolution: {integrity: sha512-6yZf53i/qB8gRHH/l2ZwUG5xgkPgQF15/KxH0DdXMDHjesA9MeZje/853ifkSY0x4m5S+dfDZ+c4x439PF0M2w==} engines: {node: '>=18.0.0'} + '@smithy/querystring-parser@4.0.5': + resolution: {integrity: sha512-6SV7md2CzNG/WUeTjVe6Dj8noH32r4MnUeFKZrnVYsQxpGSIcphAanQMayi8jJLZAWm6pdM9ZXvKCpWOsIGg0w==} + engines: {node: '>=18.0.0'} + '@smithy/service-error-classification@4.0.5': resolution: {integrity: sha512-LvcfhrnCBvCmTee81pRlh1F39yTS/+kYleVeLCwNtkY8wtGg8V/ca9rbZZvYIl8OjlMtL6KIjaiL/lgVqHD2nA==} engines: {node: '>=18.0.0'} + '@smithy/service-error-classification@4.0.7': + resolution: {integrity: sha512-XvRHOipqpwNhEjDf2L5gJowZEm5nsxC16pAZOeEcsygdjv9A2jdOh3YoDQvOXBGTsaJk6mNWtzWalOB9976Wlg==} + engines: {node: '>=18.0.0'} + '@smithy/shared-ini-file-loader@4.0.4': resolution: {integrity: sha512-63X0260LoFBjrHifPDs+nM9tV0VMkOTl4JRMYNuKh/f5PauSjowTfvF3LogfkWdcPoxsA9UjqEOgjeYIbhb7Nw==} engines: {node: '>=18.0.0'} + '@smithy/shared-ini-file-loader@4.0.5': + resolution: {integrity: sha512-YVVwehRDuehgoXdEL4r1tAAzdaDgaC9EQvhK0lEbfnbrd0bd5+CTQumbdPryX3J2shT7ZqQE+jPW4lmNBAB8JQ==} + engines: {node: '>=18.0.0'} + '@smithy/signature-v4@5.1.2': resolution: {integrity: sha512-d3+U/VpX7a60seHziWnVZOHuEgJlclufjkS6zhXvxcJgkJq4UWdH5eOBLzHRMx6gXjsdT9h6lfpmLzbrdupHgQ==} engines: {node: '>=18.0.0'} + '@smithy/signature-v4@5.1.3': + resolution: {integrity: sha512-mARDSXSEgllNzMw6N+mC+r1AQlEBO3meEAkR/UlfAgnMzJUB3goRBWgip1EAMG99wh36MDqzo86SfIX5Y+VEaw==} + engines: {node: '>=18.0.0'} + + '@smithy/smithy-client@4.4.10': + resolution: {integrity: sha512-iW6HjXqN0oPtRS0NK/zzZ4zZeGESIFcxj2FkWed3mcK8jdSdHzvnCKXSjvewESKAgGKAbJRA+OsaqKhkdYRbQQ==} + engines: {node: '>=18.0.0'} + '@smithy/smithy-client@4.4.3': resolution: {integrity: sha512-xxzNYgA0HD6ETCe5QJubsxP0hQH3QK3kbpJz3QrosBCuIWyEXLR/CO5hFb2OeawEKUxMNhz3a1nuJNN2np2RMA==} engines: {node: '>=18.0.0'} @@ -4295,10 +4490,18 @@ packages: resolution: {integrity: sha512-UqKOQBL2x6+HWl3P+3QqFD4ncKq0I8Nuz9QItGv5WuKuMHuuwlhvqcZCoXGfc+P1QmfJE7VieykoYYmrOoFJxA==} engines: {node: '>=18.0.0'} + '@smithy/types@4.3.2': + resolution: {integrity: sha512-QO4zghLxiQ5W9UZmX2Lo0nta2PuE1sSrXUYDoaB6HMR762C0P7v/HEPHf6ZdglTVssJG1bsrSBxdc3quvDSihw==} + engines: {node: '>=18.0.0'} + '@smithy/url-parser@4.0.4': resolution: {integrity: sha512-eMkc144MuN7B0TDA4U2fKs+BqczVbk3W+qIvcoCY6D1JY3hnAdCuhCZODC+GAeaxj0p6Jroz4+XMUn3PCxQQeQ==} engines: {node: '>=18.0.0'} + '@smithy/url-parser@4.0.5': + resolution: {integrity: sha512-j+733Um7f1/DXjYhCbvNXABV53NyCRRA54C7bNEIxNPs0YjfRxeMKjjgm2jvTYrciZyCjsicHwQ6Q0ylo+NAUw==} + engines: {node: '>=18.0.0'} + '@smithy/util-base64@4.0.0': resolution: {integrity: sha512-CvHfCmO2mchox9kjrtzoHkWHxjHZzaFojLc8quxXY7WAAMAg43nuxwv95tATVgQFNDwd4M9S1qFzj40Ul41Kmg==} engines: {node: '>=18.0.0'} @@ -4327,14 +4530,26 @@ packages: resolution: {integrity: sha512-mvLMh87xSmQrV5XqnUYEPoiFFeEGYeAKIDDKdhE2ahqitm8OHM3aSvhqL6rrK6wm1brIk90JhxDf5lf2hbrLbQ==} engines: {node: '>=18.0.0'} + '@smithy/util-defaults-mode-browser@4.0.26': + resolution: {integrity: sha512-xgl75aHIS/3rrGp7iTxQAOELYeyiwBu+eEgAk4xfKwJJ0L8VUjhO2shsDpeil54BOFsqmk5xfdesiewbUY5tKQ==} + engines: {node: '>=18.0.0'} + '@smithy/util-defaults-mode-node@4.0.19': resolution: {integrity: sha512-8tYnx+LUfj6m+zkUUIrIQJxPM1xVxfRBvoGHua7R/i6qAxOMjqR6CpEpDwKoIs1o0+hOjGvkKE23CafKL0vJ9w==} engines: {node: '>=18.0.0'} + '@smithy/util-defaults-mode-node@4.0.26': + resolution: {integrity: sha512-z81yyIkGiLLYVDetKTUeCZQ8x20EEzvQjrqJtb/mXnevLq2+w3XCEWTJ2pMp401b6BkEkHVfXb/cROBpVauLMQ==} + engines: {node: '>=18.0.0'} + '@smithy/util-endpoints@3.0.6': resolution: {integrity: sha512-YARl3tFL3WgPuLzljRUnrS2ngLiUtkwhQtj8PAL13XZSyUiNLQxwG3fBBq3QXFqGFUXepIN73pINp3y8c2nBmA==} engines: {node: '>=18.0.0'} + '@smithy/util-endpoints@3.0.7': + resolution: {integrity: sha512-klGBP+RpBp6V5JbrY2C/VKnHXn3d5V2YrifZbmMY8os7M6m8wdYFoO6w/fe5VkP+YVwrEktW3IWYaSQVNZJ8oQ==} + engines: {node: '>=18.0.0'} + '@smithy/util-hex-encoding@4.0.0': resolution: {integrity: sha512-Yk5mLhHtfIgW2W2WQZWSg5kuMZCVbvhFmC7rV4IO2QqnZdbEFPmQnCcGMAX2z/8Qj3B9hYYNjZOhWym+RwhePw==} engines: {node: '>=18.0.0'} @@ -4343,14 +4558,26 @@ packages: resolution: {integrity: sha512-9MLKmkBmf4PRb0ONJikCbCwORACcil6gUWojwARCClT7RmLzF04hUR4WdRprIXal7XVyrddadYNfp2eF3nrvtQ==} engines: {node: '>=18.0.0'} + '@smithy/util-middleware@4.0.5': + resolution: {integrity: sha512-N40PfqsZHRSsByGB81HhSo+uvMxEHT+9e255S53pfBw/wI6WKDI7Jw9oyu5tJTLwZzV5DsMha3ji8jk9dsHmQQ==} + engines: {node: '>=18.0.0'} + '@smithy/util-retry@4.0.5': resolution: {integrity: sha512-V7MSjVDTlEt/plmOFBn1762Dyu5uqMrV2Pl2X0dYk4XvWfdWJNe9Bs5Bzb56wkCuiWjSfClVMGcsuKrGj7S/yg==} engines: {node: '>=18.0.0'} + '@smithy/util-retry@4.0.7': + resolution: {integrity: sha512-TTO6rt0ppK70alZpkjwy+3nQlTiqNfoXja+qwuAchIEAIoSZW8Qyd76dvBv3I5bCpE38APafG23Y/u270NspiQ==} + engines: {node: '>=18.0.0'} + '@smithy/util-stream@4.2.2': resolution: {integrity: sha512-aI+GLi7MJoVxg24/3J1ipwLoYzgkB4kUfogZfnslcYlynj3xsQ0e7vk4TnTro9hhsS5PvX1mwmkRqqHQjwcU7w==} engines: {node: '>=18.0.0'} + '@smithy/util-stream@4.2.4': + resolution: {integrity: sha512-vSKnvNZX2BXzl0U2RgCLOwWaAP9x/ddd/XobPK02pCbzRm5s55M53uwb1rl/Ts7RXZvdJZerPkA+en2FDghLuQ==} + engines: {node: '>=18.0.0'} + '@smithy/util-uri-escape@4.0.0': resolution: {integrity: sha512-77yfbCbQMtgtTylO9itEAdpPXSog3ZxMe09AEhm0dU0NLTalV70ghDZFR+Nfi1C60jnJoh/Re4090/DuZh2Omg==} engines: {node: '>=18.0.0'} @@ -4367,6 +4594,10 @@ packages: resolution: {integrity: sha512-4QvC49HTteI1gfemu0I1syWovJgPvGn7CVUoN9ZFkdvr/cCFkrEL7qNCdx/2eICqDWEGnnr68oMdSIPCLAriSQ==} engines: {node: '>=18.0.0'} + '@smithy/util-waiter@4.0.7': + resolution: {integrity: sha512-mYqtQXPmrwvUljaHyGxYUIIRI3qjBTEb/f5QFi3A6VlxhpmZd5mWXn9W+qUkf2pVE1Hv3SqxefiZOPGdxmO64A==} + engines: {node: '>=18.0.0'} + '@socket.io/component-emitter@3.1.2': resolution: {integrity: sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==} @@ -5970,6 +6201,9 @@ packages: react: ^18 || ^19 || ^19.0.0-rc react-dom: ^18 || ^19 || ^19.0.0-rc + cohere-ai@7.18.1: + resolution: {integrity: sha512-kZM8dMAJlNXshl1c0X+jPTkD5P4QSexZ2LHQ6Cu8g2v3j1eGyT6rJKk+zzJikluWJ1bAyobj+ucGIhmQtWfl4A==} + color-convert@1.9.3: resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==} @@ -6073,6 +6307,10 @@ packages: convert-source-map@2.0.0: resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} + convict@6.2.4: + resolution: {integrity: sha512-qN60BAwdMVdofckX7AlohVJ2x9UvjTNoKVXCL2LxFk1l7757EJqf1nySdMkPQer0bt8kQ5lQiyZ9/2NvrFBuwQ==} + engines: {node: '>=6'} + cookie-signature@1.0.6: resolution: {integrity: sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==} @@ -7105,6 +7343,10 @@ packages: resolution: {integrity: sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==} hasBin: true + fast-xml-parser@5.2.5: + resolution: {integrity: sha512-pfX9uG9Ki0yekDHx2SiuRIyFdyAr1kMIMitPvb0YBo8SUfKvia7w7FIyd/l6av85pFYRhZscS75MwMnbvY+hcQ==} + hasBin: true + fastq@1.19.1: resolution: {integrity: sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==} @@ -7184,6 +7426,10 @@ packages: resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==} engines: {node: '>=14'} + form-data-encoder@4.1.0: + resolution: {integrity: sha512-G6NsmEW15s0Uw9XnCg+33H3ViYRyiM0hMrMhhqQOR8NFc5GhYrI+6I3u7OTw7b91J2g8rtvMBZJDbcGb2YUniw==} + engines: {node: '>= 18'} + form-data@4.0.3: resolution: {integrity: sha512-qsITQPfmvMOSAdeyZ+12I1c+CKSstAFAwu+97zrnWAbIr5u8wfsExUzCesVLC8NgHuRUqNN4Zy6UPWUTRGslcA==} engines: {node: '>= 6'} @@ -7192,6 +7438,10 @@ packages: resolution: {integrity: sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww==} engines: {node: '>=0.4.x'} + formdata-node@6.0.3: + resolution: {integrity: sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==} + engines: {node: '>= 18'} + forwarded@0.2.0: resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==} engines: {node: '>= 0.6'} @@ -7919,6 +8169,9 @@ packages: resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==} engines: {node: '>=10'} + js-base64@3.7.7: + resolution: {integrity: sha512-7rCnleh0z2CkXhH67J8K1Ytz0b2Y+yxTPL+/KOJoa20hfnVQ/3/T6W/KflYI4bRHRagNeXeU2bkNGI3v1oS/lw==} + js-beautify@1.15.4: resolution: {integrity: sha512-9/KXeZUKKJwqCXUdBxFJ3vPh467OCckSBmYDwSK/EtV090K+iMJ7zx2S3HLVDIWFQdqMIsZWbnaGiba18aWhaA==} engines: {node: '>=14'} @@ -8194,6 +8447,9 @@ packages: lodash.castarray@4.4.0: resolution: {integrity: sha512-aVx8ztPv7/2ULbArGJ2Y42bG1mEQ5mGjpdvrbJcJFU3TbYybe+QlLS4pst9zV52ymy2in1KpFPiZnAOATxD4+Q==} + lodash.clonedeep@4.5.0: + resolution: {integrity: sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==} + lodash.debounce@4.0.8: resolution: {integrity: sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow==} @@ -9622,6 +9878,10 @@ packages: process-nextick-args@2.0.1: resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} + process@0.11.10: + resolution: {integrity: sha512-cdGef/drWFoydD1JsMzuFf8100nZl+GT+yacc2bEced5f9Rjk4z+WtFUTBu9PhOi9j/jfmBPu0mMEY4wIdAF8A==} + engines: {node: '>= 0.6.0'} + prom-client@15.1.3: resolution: {integrity: sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==} engines: {node: ^16 || ^18 || >=20} @@ -9948,6 +10208,10 @@ packages: resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} engines: {node: '>= 6'} + readable-stream@4.7.0: + resolution: {integrity: sha512-oIGGmcpTLwPga8Bn6/Z75SVaH1z5dUut2ibSyAMVhmUggWpmDn2dapB0n7f8nwaSiRtepAsfJyfXIO5DCVAODg==} + engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} + readdirp@3.6.0: resolution: {integrity: sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==} engines: {node: '>=8.10.0'} @@ -10553,6 +10817,9 @@ packages: strnum@1.1.2: resolution: {integrity: sha512-vrN+B7DBIoTTZjnPNewwhx6cBA/H+IS7rfW68n7XxC1y7uoiGQBxaKzqucGUgavX15dJgiGztLJ8vxuEzwqBdA==} + strnum@2.1.1: + resolution: {integrity: sha512-7ZvoFTiCnGxBtDqJ//Cu6fWtZtc7Y3x+QOirG15wztbdngGSkht27o2pyGWrVy0b4WAy3jbKmnoK6g5VlVNUUw==} + style-to-js@1.1.17: resolution: {integrity: sha512-xQcBGDxJb6jjFCTzvQtfiPn6YvvP2O8U1MDIPNfJQlWMYfktPy+iGsHE7cssjs7y84d9fQaK4UF3RIJaAHSoYA==} @@ -11113,6 +11380,9 @@ packages: uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} + url-join@4.0.1: + resolution: {integrity: sha512-jk1+QP6ZJqyOiuEI9AEWQfju/nB2Pw466kbA0LEZljHwKeMgd9WrAEgEGxjPDD2+TNbbb37rTyhEfrCXfuKXnA==} + use-callback-ref@1.3.3: resolution: {integrity: sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==} engines: {node: '>=10'} @@ -11487,6 +11757,10 @@ packages: resolution: {integrity: sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==} engines: {node: '>=6'} + yargs-parser@20.2.9: + resolution: {integrity: sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==} + engines: {node: '>=10'} + yargs-parser@21.1.1: resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} engines: {node: '>=12'} @@ -11641,6 +11915,97 @@ snapshots: '@smithy/util-utf8': 2.3.0 tslib: 2.8.1 + '@aws-sdk/client-cognito-identity@3.864.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.864.0 + '@aws-sdk/credential-provider-node': 3.864.0 + '@aws-sdk/middleware-host-header': 3.862.0 + '@aws-sdk/middleware-logger': 3.862.0 + '@aws-sdk/middleware-recursion-detection': 3.862.0 + '@aws-sdk/middleware-user-agent': 3.864.0 + '@aws-sdk/region-config-resolver': 3.862.0 + '@aws-sdk/types': 3.862.0 + '@aws-sdk/util-endpoints': 3.862.0 + '@aws-sdk/util-user-agent-browser': 3.862.0 + '@aws-sdk/util-user-agent-node': 3.864.0 + '@smithy/config-resolver': 4.1.5 + '@smithy/core': 3.8.0 + '@smithy/fetch-http-handler': 5.1.1 + '@smithy/hash-node': 4.0.5 + '@smithy/invalid-dependency': 4.0.5 + '@smithy/middleware-content-length': 4.0.5 + '@smithy/middleware-endpoint': 4.1.18 + '@smithy/middleware-retry': 4.1.19 + '@smithy/middleware-serde': 4.0.9 + '@smithy/middleware-stack': 4.0.5 + '@smithy/node-config-provider': 4.1.4 + '@smithy/node-http-handler': 4.1.1 + '@smithy/protocol-http': 5.1.3 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + '@smithy/util-base64': 4.0.0 + '@smithy/util-body-length-browser': 4.0.0 + '@smithy/util-body-length-node': 4.0.0 + '@smithy/util-defaults-mode-browser': 4.0.26 + '@smithy/util-defaults-mode-node': 4.0.26 + '@smithy/util-endpoints': 3.0.7 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-retry': 4.0.7 + '@smithy/util-utf8': 4.0.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/client-sagemaker@3.870.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.864.0 + '@aws-sdk/credential-provider-node': 3.864.0 + '@aws-sdk/middleware-host-header': 3.862.0 + '@aws-sdk/middleware-logger': 3.862.0 + '@aws-sdk/middleware-recursion-detection': 3.862.0 + '@aws-sdk/middleware-user-agent': 3.864.0 + '@aws-sdk/region-config-resolver': 3.862.0 + '@aws-sdk/types': 3.862.0 + '@aws-sdk/util-endpoints': 3.862.0 + '@aws-sdk/util-user-agent-browser': 3.862.0 + '@aws-sdk/util-user-agent-node': 3.864.0 + '@smithy/config-resolver': 4.1.5 + '@smithy/core': 3.8.0 + '@smithy/fetch-http-handler': 5.1.1 + '@smithy/hash-node': 4.0.5 + '@smithy/invalid-dependency': 4.0.5 + '@smithy/middleware-content-length': 4.0.5 + '@smithy/middleware-endpoint': 4.1.18 + '@smithy/middleware-retry': 4.1.19 + '@smithy/middleware-serde': 4.0.9 + '@smithy/middleware-stack': 4.0.5 + '@smithy/node-config-provider': 4.1.4 + '@smithy/node-http-handler': 4.1.1 + '@smithy/protocol-http': 5.1.3 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + '@smithy/util-base64': 4.0.0 + '@smithy/util-body-length-browser': 4.0.0 + '@smithy/util-body-length-node': 4.0.0 + '@smithy/util-defaults-mode-browser': 4.0.26 + '@smithy/util-defaults-mode-node': 4.0.26 + '@smithy/util-endpoints': 3.0.7 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-retry': 4.0.7 + '@smithy/util-utf8': 4.0.0 + '@smithy/util-waiter': 4.0.7 + '@types/uuid': 9.0.8 + tslib: 2.8.1 + uuid: 9.0.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/client-ses@3.828.0': dependencies: '@aws-crypto/sha256-browser': 5.2.0 @@ -11729,6 +12094,49 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/client-sso@3.864.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.864.0 + '@aws-sdk/middleware-host-header': 3.862.0 + '@aws-sdk/middleware-logger': 3.862.0 + '@aws-sdk/middleware-recursion-detection': 3.862.0 + '@aws-sdk/middleware-user-agent': 3.864.0 + '@aws-sdk/region-config-resolver': 3.862.0 + '@aws-sdk/types': 3.862.0 + '@aws-sdk/util-endpoints': 3.862.0 + '@aws-sdk/util-user-agent-browser': 3.862.0 + '@aws-sdk/util-user-agent-node': 3.864.0 + '@smithy/config-resolver': 4.1.5 + '@smithy/core': 3.8.0 + '@smithy/fetch-http-handler': 5.1.1 + '@smithy/hash-node': 4.0.5 + '@smithy/invalid-dependency': 4.0.5 + '@smithy/middleware-content-length': 4.0.5 + '@smithy/middleware-endpoint': 4.1.18 + '@smithy/middleware-retry': 4.1.19 + '@smithy/middleware-serde': 4.0.9 + '@smithy/middleware-stack': 4.0.5 + '@smithy/node-config-provider': 4.1.4 + '@smithy/node-http-handler': 4.1.1 + '@smithy/protocol-http': 5.1.3 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + '@smithy/util-base64': 4.0.0 + '@smithy/util-body-length-browser': 4.0.0 + '@smithy/util-body-length-node': 4.0.0 + '@smithy/util-defaults-mode-browser': 4.0.26 + '@smithy/util-defaults-mode-node': 4.0.26 + '@smithy/util-endpoints': 3.0.7 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-retry': 4.0.7 + '@smithy/util-utf8': 4.0.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/core@3.826.0': dependencies: '@aws-sdk/types': 3.821.0 @@ -11747,6 +12155,34 @@ snapshots: fast-xml-parser: 4.4.1 tslib: 2.8.1 + '@aws-sdk/core@3.864.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@aws-sdk/xml-builder': 3.862.0 + '@smithy/core': 3.8.0 + '@smithy/node-config-provider': 4.1.4 + '@smithy/property-provider': 4.0.5 + '@smithy/protocol-http': 5.1.3 + '@smithy/signature-v4': 5.1.3 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/util-base64': 4.0.0 + '@smithy/util-body-length-browser': 4.0.0 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-utf8': 4.0.0 + fast-xml-parser: 5.2.5 + tslib: 2.8.1 + + '@aws-sdk/credential-provider-cognito-identity@3.864.0': + dependencies: + '@aws-sdk/client-cognito-identity': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/property-provider': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/credential-provider-env@3.826.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11755,6 +12191,14 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/credential-provider-env@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/property-provider': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/credential-provider-http@3.826.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11768,6 +12212,19 @@ snapshots: '@smithy/util-stream': 4.2.2 tslib: 2.8.1 + '@aws-sdk/credential-provider-http@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/fetch-http-handler': 5.1.1 + '@smithy/node-http-handler': 4.1.1 + '@smithy/property-provider': 4.0.5 + '@smithy/protocol-http': 5.1.3 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/util-stream': 4.2.4 + tslib: 2.8.1 + '@aws-sdk/credential-provider-ini@3.828.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11786,6 +12243,24 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/credential-provider-ini@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/credential-provider-env': 3.864.0 + '@aws-sdk/credential-provider-http': 3.864.0 + '@aws-sdk/credential-provider-process': 3.864.0 + '@aws-sdk/credential-provider-sso': 3.864.0 + '@aws-sdk/credential-provider-web-identity': 3.864.0 + '@aws-sdk/nested-clients': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/credential-provider-imds': 4.0.7 + '@smithy/property-provider': 4.0.5 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/credential-provider-node@3.828.0': dependencies: '@aws-sdk/credential-provider-env': 3.826.0 @@ -11803,6 +12278,23 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/credential-provider-node@3.864.0': + dependencies: + '@aws-sdk/credential-provider-env': 3.864.0 + '@aws-sdk/credential-provider-http': 3.864.0 + '@aws-sdk/credential-provider-ini': 3.864.0 + '@aws-sdk/credential-provider-process': 3.864.0 + '@aws-sdk/credential-provider-sso': 3.864.0 + '@aws-sdk/credential-provider-web-identity': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/credential-provider-imds': 4.0.7 + '@smithy/property-provider': 4.0.5 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/credential-provider-process@3.826.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11812,6 +12304,15 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/credential-provider-process@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/property-provider': 4.0.5 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/credential-provider-sso@3.828.0': dependencies: '@aws-sdk/client-sso': 3.828.0 @@ -11825,6 +12326,19 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/credential-provider-sso@3.864.0': + dependencies: + '@aws-sdk/client-sso': 3.864.0 + '@aws-sdk/core': 3.864.0 + '@aws-sdk/token-providers': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/property-provider': 4.0.5 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/credential-provider-web-identity@3.828.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11836,6 +12350,41 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/credential-provider-web-identity@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/nested-clients': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/property-provider': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + + '@aws-sdk/credential-providers@3.864.0': + dependencies: + '@aws-sdk/client-cognito-identity': 3.864.0 + '@aws-sdk/core': 3.864.0 + '@aws-sdk/credential-provider-cognito-identity': 3.864.0 + '@aws-sdk/credential-provider-env': 3.864.0 + '@aws-sdk/credential-provider-http': 3.864.0 + '@aws-sdk/credential-provider-ini': 3.864.0 + '@aws-sdk/credential-provider-node': 3.864.0 + '@aws-sdk/credential-provider-process': 3.864.0 + '@aws-sdk/credential-provider-sso': 3.864.0 + '@aws-sdk/credential-provider-web-identity': 3.864.0 + '@aws-sdk/nested-clients': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/config-resolver': 4.1.5 + '@smithy/core': 3.8.0 + '@smithy/credential-provider-imds': 4.0.7 + '@smithy/node-config-provider': 4.1.4 + '@smithy/property-provider': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/middleware-host-header@3.821.0': dependencies: '@aws-sdk/types': 3.821.0 @@ -11843,12 +12392,25 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/middleware-host-header@3.862.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/middleware-logger@3.821.0': dependencies: '@aws-sdk/types': 3.821.0 '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/middleware-logger@3.862.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/middleware-recursion-detection@3.821.0': dependencies: '@aws-sdk/types': 3.821.0 @@ -11856,6 +12418,13 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/middleware-recursion-detection@3.862.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/middleware-user-agent@3.828.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11866,6 +12435,16 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/middleware-user-agent@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@aws-sdk/util-endpoints': 3.862.0 + '@smithy/core': 3.8.0 + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/nested-clients@3.828.0': dependencies: '@aws-crypto/sha256-browser': 5.2.0 @@ -11909,6 +12488,49 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/nested-clients@3.864.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.864.0 + '@aws-sdk/middleware-host-header': 3.862.0 + '@aws-sdk/middleware-logger': 3.862.0 + '@aws-sdk/middleware-recursion-detection': 3.862.0 + '@aws-sdk/middleware-user-agent': 3.864.0 + '@aws-sdk/region-config-resolver': 3.862.0 + '@aws-sdk/types': 3.862.0 + '@aws-sdk/util-endpoints': 3.862.0 + '@aws-sdk/util-user-agent-browser': 3.862.0 + '@aws-sdk/util-user-agent-node': 3.864.0 + '@smithy/config-resolver': 4.1.5 + '@smithy/core': 3.8.0 + '@smithy/fetch-http-handler': 5.1.1 + '@smithy/hash-node': 4.0.5 + '@smithy/invalid-dependency': 4.0.5 + '@smithy/middleware-content-length': 4.0.5 + '@smithy/middleware-endpoint': 4.1.18 + '@smithy/middleware-retry': 4.1.19 + '@smithy/middleware-serde': 4.0.9 + '@smithy/middleware-stack': 4.0.5 + '@smithy/node-config-provider': 4.1.4 + '@smithy/node-http-handler': 4.1.1 + '@smithy/protocol-http': 5.1.3 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + '@smithy/util-base64': 4.0.0 + '@smithy/util-body-length-browser': 4.0.0 + '@smithy/util-body-length-node': 4.0.0 + '@smithy/util-defaults-mode-browser': 4.0.26 + '@smithy/util-defaults-mode-node': 4.0.26 + '@smithy/util-endpoints': 3.0.7 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-retry': 4.0.7 + '@smithy/util-utf8': 4.0.0 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/region-config-resolver@3.821.0': dependencies: '@aws-sdk/types': 3.821.0 @@ -11918,6 +12540,15 @@ snapshots: '@smithy/util-middleware': 4.0.4 tslib: 2.8.1 + '@aws-sdk/region-config-resolver@3.862.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@smithy/node-config-provider': 4.1.4 + '@smithy/types': 4.3.2 + '@smithy/util-config-provider': 4.0.0 + '@smithy/util-middleware': 4.0.5 + tslib: 2.8.1 + '@aws-sdk/token-providers@3.828.0': dependencies: '@aws-sdk/core': 3.826.0 @@ -11930,11 +12561,28 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/token-providers@3.864.0': + dependencies: + '@aws-sdk/core': 3.864.0 + '@aws-sdk/nested-clients': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/property-provider': 4.0.5 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/types@3.821.0': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/types@3.862.0': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/util-endpoints@3.828.0': dependencies: '@aws-sdk/types': 3.821.0 @@ -11942,6 +12590,14 @@ snapshots: '@smithy/util-endpoints': 3.0.6 tslib: 2.8.1 + '@aws-sdk/util-endpoints@3.862.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + '@smithy/util-endpoints': 3.0.7 + tslib: 2.8.1 + '@aws-sdk/util-locate-window@3.804.0': dependencies: tslib: 2.8.1 @@ -11953,6 +12609,13 @@ snapshots: bowser: 2.11.0 tslib: 2.8.1 + '@aws-sdk/util-user-agent-browser@3.862.0': + dependencies: + '@aws-sdk/types': 3.862.0 + '@smithy/types': 4.3.2 + bowser: 2.11.0 + tslib: 2.8.1 + '@aws-sdk/util-user-agent-node@3.828.0': dependencies: '@aws-sdk/middleware-user-agent': 3.828.0 @@ -11961,11 +12624,24 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/util-user-agent-node@3.864.0': + dependencies: + '@aws-sdk/middleware-user-agent': 3.864.0 + '@aws-sdk/types': 3.862.0 + '@smithy/node-config-provider': 4.1.4 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@aws-sdk/xml-builder@3.821.0': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@aws-sdk/xml-builder@3.862.0': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@babel/code-frame@7.27.1': dependencies: '@babel/helper-validator-identifier': 7.27.1 @@ -15095,6 +15771,11 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/abort-controller@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/config-resolver@4.1.4': dependencies: '@smithy/node-config-provider': 4.1.3 @@ -15103,6 +15784,14 @@ snapshots: '@smithy/util-middleware': 4.0.4 tslib: 2.8.1 + '@smithy/config-resolver@4.1.5': + dependencies: + '@smithy/node-config-provider': 4.1.4 + '@smithy/types': 4.3.2 + '@smithy/util-config-provider': 4.0.0 + '@smithy/util-middleware': 4.0.5 + tslib: 2.8.1 + '@smithy/core@3.5.3': dependencies: '@smithy/middleware-serde': 4.0.8 @@ -15115,6 +15804,20 @@ snapshots: '@smithy/util-utf8': 4.0.0 tslib: 2.8.1 + '@smithy/core@3.8.0': + dependencies: + '@smithy/middleware-serde': 4.0.9 + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + '@smithy/util-base64': 4.0.0 + '@smithy/util-body-length-browser': 4.0.0 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-stream': 4.2.4 + '@smithy/util-utf8': 4.0.0 + '@types/uuid': 9.0.8 + tslib: 2.8.1 + uuid: 9.0.1 + '@smithy/credential-provider-imds@4.0.6': dependencies: '@smithy/node-config-provider': 4.1.3 @@ -15123,6 +15826,14 @@ snapshots: '@smithy/url-parser': 4.0.4 tslib: 2.8.1 + '@smithy/credential-provider-imds@4.0.7': + dependencies: + '@smithy/node-config-provider': 4.1.4 + '@smithy/property-provider': 4.0.5 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + tslib: 2.8.1 + '@smithy/fetch-http-handler@5.0.4': dependencies: '@smithy/protocol-http': 5.1.2 @@ -15131,6 +15842,14 @@ snapshots: '@smithy/util-base64': 4.0.0 tslib: 2.8.1 + '@smithy/fetch-http-handler@5.1.1': + dependencies: + '@smithy/protocol-http': 5.1.3 + '@smithy/querystring-builder': 4.0.5 + '@smithy/types': 4.3.2 + '@smithy/util-base64': 4.0.0 + tslib: 2.8.1 + '@smithy/hash-node@4.0.4': dependencies: '@smithy/types': 4.3.1 @@ -15138,11 +15857,23 @@ snapshots: '@smithy/util-utf8': 4.0.0 tslib: 2.8.1 + '@smithy/hash-node@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + '@smithy/util-buffer-from': 4.0.0 + '@smithy/util-utf8': 4.0.0 + tslib: 2.8.1 + '@smithy/invalid-dependency@4.0.4': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/invalid-dependency@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/is-array-buffer@2.2.0': dependencies: tslib: 2.8.1 @@ -15157,6 +15888,12 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/middleware-content-length@4.0.5': + dependencies: + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/middleware-endpoint@4.1.11': dependencies: '@smithy/core': 3.5.3 @@ -15168,6 +15905,17 @@ snapshots: '@smithy/util-middleware': 4.0.4 tslib: 2.8.1 + '@smithy/middleware-endpoint@4.1.18': + dependencies: + '@smithy/core': 3.8.0 + '@smithy/middleware-serde': 4.0.9 + '@smithy/node-config-provider': 4.1.4 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + '@smithy/url-parser': 4.0.5 + '@smithy/util-middleware': 4.0.5 + tslib: 2.8.1 + '@smithy/middleware-retry@4.1.12': dependencies: '@smithy/node-config-provider': 4.1.3 @@ -15180,17 +15928,41 @@ snapshots: tslib: 2.8.1 uuid: 9.0.1 + '@smithy/middleware-retry@4.1.19': + dependencies: + '@smithy/node-config-provider': 4.1.4 + '@smithy/protocol-http': 5.1.3 + '@smithy/service-error-classification': 4.0.7 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-retry': 4.0.7 + '@types/uuid': 9.0.8 + tslib: 2.8.1 + uuid: 9.0.1 + '@smithy/middleware-serde@4.0.8': dependencies: '@smithy/protocol-http': 5.1.2 '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/middleware-serde@4.0.9': + dependencies: + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/middleware-stack@4.0.4': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/middleware-stack@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/node-config-provider@4.1.3': dependencies: '@smithy/property-provider': 4.0.4 @@ -15198,6 +15970,13 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/node-config-provider@4.1.4': + dependencies: + '@smithy/property-provider': 4.0.5 + '@smithy/shared-ini-file-loader': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/node-http-handler@4.0.6': dependencies: '@smithy/abort-controller': 4.0.4 @@ -15206,36 +15985,74 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/node-http-handler@4.1.1': + dependencies: + '@smithy/abort-controller': 4.0.5 + '@smithy/protocol-http': 5.1.3 + '@smithy/querystring-builder': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/property-provider@4.0.4': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/property-provider@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/protocol-http@5.1.2': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/protocol-http@5.1.3': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/querystring-builder@4.0.4': dependencies: '@smithy/types': 4.3.1 '@smithy/util-uri-escape': 4.0.0 tslib: 2.8.1 + '@smithy/querystring-builder@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + '@smithy/util-uri-escape': 4.0.0 + tslib: 2.8.1 + '@smithy/querystring-parser@4.0.4': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/querystring-parser@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/service-error-classification@4.0.5': dependencies: '@smithy/types': 4.3.1 + '@smithy/service-error-classification@4.0.7': + dependencies: + '@smithy/types': 4.3.2 + '@smithy/shared-ini-file-loader@4.0.4': dependencies: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/shared-ini-file-loader@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/signature-v4@5.1.2': dependencies: '@smithy/is-array-buffer': 4.0.0 @@ -15247,6 +16064,27 @@ snapshots: '@smithy/util-utf8': 4.0.0 tslib: 2.8.1 + '@smithy/signature-v4@5.1.3': + dependencies: + '@smithy/is-array-buffer': 4.0.0 + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + '@smithy/util-hex-encoding': 4.0.0 + '@smithy/util-middleware': 4.0.5 + '@smithy/util-uri-escape': 4.0.0 + '@smithy/util-utf8': 4.0.0 + tslib: 2.8.1 + + '@smithy/smithy-client@4.4.10': + dependencies: + '@smithy/core': 3.8.0 + '@smithy/middleware-endpoint': 4.1.18 + '@smithy/middleware-stack': 4.0.5 + '@smithy/protocol-http': 5.1.3 + '@smithy/types': 4.3.2 + '@smithy/util-stream': 4.2.4 + tslib: 2.8.1 + '@smithy/smithy-client@4.4.3': dependencies: '@smithy/core': 3.5.3 @@ -15261,12 +16099,22 @@ snapshots: dependencies: tslib: 2.8.1 + '@smithy/types@4.3.2': + dependencies: + tslib: 2.8.1 + '@smithy/url-parser@4.0.4': dependencies: '@smithy/querystring-parser': 4.0.4 '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/url-parser@4.0.5': + dependencies: + '@smithy/querystring-parser': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/util-base64@4.0.0': dependencies: '@smithy/util-buffer-from': 4.0.0 @@ -15303,6 +16151,14 @@ snapshots: bowser: 2.11.0 tslib: 2.8.1 + '@smithy/util-defaults-mode-browser@4.0.26': + dependencies: + '@smithy/property-provider': 4.0.5 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + bowser: 2.11.0 + tslib: 2.8.1 + '@smithy/util-defaults-mode-node@4.0.19': dependencies: '@smithy/config-resolver': 4.1.4 @@ -15313,12 +16169,28 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/util-defaults-mode-node@4.0.26': + dependencies: + '@smithy/config-resolver': 4.1.5 + '@smithy/credential-provider-imds': 4.0.7 + '@smithy/node-config-provider': 4.1.4 + '@smithy/property-provider': 4.0.5 + '@smithy/smithy-client': 4.4.10 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/util-endpoints@3.0.6': dependencies: '@smithy/node-config-provider': 4.1.3 '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/util-endpoints@3.0.7': + dependencies: + '@smithy/node-config-provider': 4.1.4 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/util-hex-encoding@4.0.0': dependencies: tslib: 2.8.1 @@ -15328,12 +16200,23 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/util-middleware@4.0.5': + dependencies: + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/util-retry@4.0.5': dependencies: '@smithy/service-error-classification': 4.0.5 '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/util-retry@4.0.7': + dependencies: + '@smithy/service-error-classification': 4.0.7 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@smithy/util-stream@4.2.2': dependencies: '@smithy/fetch-http-handler': 5.0.4 @@ -15345,6 +16228,17 @@ snapshots: '@smithy/util-utf8': 4.0.0 tslib: 2.8.1 + '@smithy/util-stream@4.2.4': + dependencies: + '@smithy/fetch-http-handler': 5.1.1 + '@smithy/node-http-handler': 4.1.1 + '@smithy/types': 4.3.2 + '@smithy/util-base64': 4.0.0 + '@smithy/util-buffer-from': 4.0.0 + '@smithy/util-hex-encoding': 4.0.0 + '@smithy/util-utf8': 4.0.0 + tslib: 2.8.1 + '@smithy/util-uri-escape@4.0.0': dependencies: tslib: 2.8.1 @@ -15365,6 +16259,12 @@ snapshots: '@smithy/types': 4.3.1 tslib: 2.8.1 + '@smithy/util-waiter@4.0.7': + dependencies: + '@smithy/abort-controller': 4.0.5 + '@smithy/types': 4.3.2 + tslib: 2.8.1 + '@socket.io/component-emitter@3.1.2': {} '@swc/core-darwin-arm64@1.3.101': @@ -17273,6 +18173,25 @@ snapshots: - '@types/react' - '@types/react-dom' + cohere-ai@7.18.1(encoding@0.1.13): + dependencies: + '@aws-sdk/client-sagemaker': 3.870.0 + '@aws-sdk/credential-providers': 3.864.0 + '@smithy/protocol-http': 5.1.2 + '@smithy/signature-v4': 5.1.2 + convict: 6.2.4 + form-data: 4.0.3 + form-data-encoder: 4.1.0 + formdata-node: 6.0.3 + js-base64: 3.7.7 + node-fetch: 2.7.0(encoding@0.1.13) + qs: 6.14.0 + readable-stream: 4.7.0 + url-join: 4.0.1 + transitivePeerDependencies: + - aws-crt + - encoding + color-convert@1.9.3: dependencies: color-name: 1.1.3 @@ -17362,6 +18281,11 @@ snapshots: convert-source-map@2.0.0: {} + convict@6.2.4: + dependencies: + lodash.clonedeep: 4.5.0 + yargs-parser: 20.2.9 + cookie-signature@1.0.6: {} cookie-signature@1.2.2: {} @@ -18789,6 +19713,10 @@ snapshots: dependencies: strnum: 1.1.2 + fast-xml-parser@5.2.5: + dependencies: + strnum: 2.1.1 + fastq@1.19.1: dependencies: reusify: 1.1.0 @@ -18884,6 +19812,8 @@ snapshots: cross-spawn: 7.0.6 signal-exit: 4.1.0 + form-data-encoder@4.1.0: {} + form-data@4.0.3: dependencies: asynckit: 0.4.0 @@ -18894,6 +19824,8 @@ snapshots: format@0.2.2: {} + formdata-node@6.0.3: {} + forwarded@0.2.0: {} fraction.js@4.3.7: {} @@ -19629,6 +20561,8 @@ snapshots: joycon@3.1.1: {} + js-base64@3.7.7: {} + js-beautify@1.15.4: dependencies: config-chain: 1.1.13 @@ -19872,6 +20806,8 @@ snapshots: lodash.castarray@4.4.0: {} + lodash.clonedeep@4.5.0: {} + lodash.debounce@4.0.8: {} lodash.defaults@4.2.0: {} @@ -21571,6 +22507,8 @@ snapshots: process-nextick-args@2.0.1: {} + process@0.11.10: {} + prom-client@15.1.3: dependencies: '@opentelemetry/api': 1.9.0 @@ -22074,6 +23012,14 @@ snapshots: string_decoder: 1.3.0 util-deprecate: 1.0.2 + readable-stream@4.7.0: + dependencies: + abort-controller: 3.0.0 + buffer: 6.0.3 + events: 3.3.0 + process: 0.11.10 + string_decoder: 1.3.0 + readdirp@3.6.0: dependencies: picomatch: 2.3.1 @@ -22833,6 +23779,8 @@ snapshots: strnum@1.1.2: {} + strnum@2.1.1: {} + style-to-js@1.1.17: dependencies: style-to-object: 1.0.9 @@ -23486,6 +24434,8 @@ snapshots: dependencies: punycode: 2.3.1 + url-join@4.0.1: {} + use-callback-ref@1.3.3(@types/react@18.2.47)(react@18.2.0): dependencies: react: 18.2.0 @@ -23900,6 +24850,8 @@ snapshots: camelcase: 5.3.1 decamelize: 1.2.0 + yargs-parser@20.2.9: {} + yargs-parser@21.1.1: {} yargs@15.4.1: