import React, { useState } from 'react'; import { Chart as ChartJS, CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend, } from 'chart.js'; import annotationPlugin from 'chartjs-plugin-annotation'; import { Bar } from 'react-chartjs-2'; // Register Chart.js components ChartJS.register( CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend, annotationPlugin ); const DebugTab = ({ debugData }) => { const [activeSection, setActiveSection] = useState('overview'); const [sortBy, setSortBy] = useState('similarity'); const [sortDirection, setSortDirection] = useState('desc'); const [wordPoolView, setWordPoolView] = useState('all'); // 'all' or 'candidates' if (!debugData || !debugData.enabled) { return (

Debug data not available. Set ENABLE_DEBUG_TAB=true on the backend.

); } const sections = [ { id: 'overview', label: 'Overview' }, { id: 'word-pool', label: 'Word Pool' }, { id: 'probability-analysis', label: 'Probability Analysis' }, { id: 'final-selection', label: 'Final Selection' } ]; const renderOverview = () => (

Generation Parameters

Topics: {debugData.generation_params.topics.join(', ')}

Difficulty: {debugData.generation_params.difficulty}

Requested Words: {debugData.generation_params.requested_words}

Thematic Pool Size: {debugData.generation_params.thematic_pool_size}

Min Similarity: {debugData.generation_params.min_similarity}

Multi-theme: {debugData.generation_params.multi_theme ? 'Yes' : 'No'}

{debugData.generation_params.multi_topic_method && (

Multi-Topic Method: {debugData.generation_params.multi_topic_method}

)} {debugData.generation_params.soft_min_beta && (

Soft Min Beta: {debugData.generation_params.soft_min_beta}

)} {debugData.generation_params.custom_sentence && (

Custom Sentence: "{debugData.generation_params.custom_sentence}"

)}

Selection Algorithm

Method: {debugData.selection_method}

Temperature: {debugData.selection_params.similarity_temperature} (lower = more deterministic)

Difficulty Weight: {debugData.selection_params.difficulty_weight} (balance between similarity and frequency)

Use Softmax: {debugData.selection_params.use_softmax_selection ? 'Yes' : 'No'}

How it works:

Composite Score = (1 - difficulty_weight) × similarity + difficulty_weight × frequency_alignment
Frequency Alignment: Gaussian distribution favoring target percentiles by difficulty
Softmax Selection: Probabilistic selection based on composite scores with temperature control
Multi-Topic Similarity: Uses {debugData.generation_params.multi_topic_method} method to find words relevant to ALL topics

Difficulty Targets:

Easy: 90th percentile (common words like CAT, DOG)
Medium: 50th percentile (balanced selection)
Hard: 20th percentile (rare words like QUETZAL, PLATYPUS)

Results Summary

Thematic Pool: {debugData.thematic_pool?.length || 0} words

Candidates: {debugData.candidate_words?.length || 0} words

Selected: {debugData.selected_words?.length || 0} words

); const renderWordTable = (words, showClue = false) => (

{showClue && } {words.map((word, idx) => ( {showClue && } ))}

Word	Similarity	Percentile	Tier	Clue
{word.word}	{word.similarity.toFixed(3)}	{word.percentile.toFixed(3)}	{word.tier.replace('tier_', '').replace('_', ' ')}	{word.clue}

); const handleSort = (column) => { if (sortBy === column) { setSortDirection(sortDirection === 'asc' ? 'desc' : 'asc'); } else { setSortBy(column); setSortDirection(column === 'word' ? 'asc' : 'desc'); } }; const getSortIcon = (column) => { if (sortBy !== column) return ' ↕️'; return sortDirection === 'asc' ? ' ▲' : ' ▼'; }; const renderSortableThematicPool = () => { const pool = debugData.thematic_pool || []; const sortedPool = [...pool].sort((a, b) => { let aVal, bVal; switch (sortBy) { case 'word': aVal = a.word.toLowerCase(); bVal = b.word.toLowerCase(); break; case 'similarity': aVal = a.similarity; bVal = b.similarity; break; case 'percentile': aVal = a.percentile; bVal = b.percentile; break; default: aVal = a.similarity; bVal = b.similarity; } if (sortDirection === 'asc') { return aVal < bVal ? -1 : aVal > bVal ? 1 : 0; } else { return aVal > bVal ? -1 : aVal < bVal ? 1 : 0; } }); return (

{sortedPool.map((word, idx) => ( ))}

handleSort('word')} style={{ cursor: 'pointer', userSelect: 'none' }} className={sortBy === 'word' ? 'sorted-column' : ''} > Word{getSortIcon('word')}	handleSort('similarity')} style={{ cursor: 'pointer', userSelect: 'none' }} className={sortBy === 'similarity' ? 'sorted-column' : ''} title={debugData.generation_params.multi_topic_method ? `Multi-Topic Similarity (${debugData.generation_params.multi_topic_method}): Score representing relevance to ALL topics simultaneously. ${debugData.generation_params.multi_topic_method === 'soft_minimum' ? 'Uses soft minimum aggregation (β=' + debugData.generation_params.soft_min_beta + ') - high scores mean the word relates well to every selected topic.' : 'Aggregated across all topics.'}` : 'Similarity: Semantic similarity score to the selected topic (0.0 to 1.0)' } > Similarity{getSortIcon('similarity')}	handleSort('percentile')} style={{ cursor: 'pointer', userSelect: 'none' }} className={sortBy === 'percentile' ? 'sorted-column' : ''} > Percentile{getSortIcon('percentile')}	Tier
{word.word}	{word.similarity.toFixed(3)}	{word.percentile.toFixed(3)}	{word.tier.replace('tier_', '').replace('_', ' ')}

); }; const renderThematicPool = () => { const pool = debugData.thematic_pool || []; return (

Thematic Pool ({pool.length} words)

All words generated thematically. Click column headers to sort.

{renderSortableThematicPool()}

); }; const renderCandidates = () => { const candidates = debugData.candidate_words || []; return (

Candidate Words ({candidates.length} words)

Words that passed filtering and got clues generated.

{renderWordTable(candidates, true)}

); }; const renderWordPool = () => { const pool = debugData.thematic_pool || []; const candidates = debugData.candidate_words || []; const isAllView = wordPoolView === 'all'; const currentData = isAllView ? pool : candidates; return (

Word Pool

{/* Toggle buttons */}

{isAllView ? 'All words generated thematically. Click column headers to sort.' : 'Words that passed filtering and got clues generated.' }

{isAllView ? renderSortableThematicPool() : renderWordTable(candidates, true)}

); }; const renderSelection = () => (

Selection Process

Algorithm: {debugData.selection_method}

Temperature: {debugData.selection_params.similarity_temperature} (lower = more deterministic)

Difficulty Weight: {debugData.selection_params.difficulty_weight} (balance between similarity and frequency)

How it works:

Composite Score = (1 - difficulty_weight) × similarity + difficulty_weight × frequency_alignment
Frequency Alignment: Gaussian distribution favoring target percentiles by difficulty
Softmax Selection: Probabilistic selection based on composite scores with temperature control
Multi-Topic Similarity: Uses {debugData.generation_params.multi_topic_method} method to find words relevant to ALL topics

Difficulty Targets:

Easy: 90th percentile (common words like CAT, DOG)
Medium: 50th percentile (balanced selection)
Hard: 20th percentile (rare words like QUETZAL, PLATYPUS)

); const renderProbabilities = () => { const probData = debugData.probability_distribution; if (!probData || !probData.probabilities) { return (

Probability Distribution

Probability data not available (only shown with softmax selection).

); } try { const probabilities = probData.probabilities; // Sort by percentile (descending) to show 100% -> 0% left to right const sortedByPercentile = [...probabilities].sort((a, b) => b.percentile - a.percentile); // Calculate distribution statistics based on position in sorted array const mean = sortedByPercentile.reduce((sum, p, i) => sum + (p.probability || 0) * i, 0); const variance = sortedByPercentile.reduce((sum, p, i) => sum + (p.probability || 0) * Math.pow(i - mean, 2), 0); const sigma = Math.sqrt(Math.max(0, variance)); // Ensure no negative variance const meanWordIndex = Math.max(0, Math.min(sortedByPercentile.length - 1, Math.round(mean))); const sigmaRangeStart = Math.max(0, Math.round(mean - sigma)); const sigmaRangeEnd = Math.min(sortedByPercentile.length - 1, Math.round(mean + sigma)); // Calculate sampling statistics with bounds checking const sigmaRangeProbMass = sortedByPercentile .slice(sigmaRangeStart, sigmaRangeEnd + 1) .reduce((sum, p) => sum + (p.probability || 0), 0); // Prepare chart data - sorted by percentile to reveal Gaussian targeting const chartData = { labels: sortedByPercentile.map(p => `${p.word}\n(${(p.percentile * 100).toFixed(0)}%)`), datasets: [ { label: 'Selection Probability (%)', data: sortedByPercentile.map(p => p.probability * 100), backgroundColor: sortedByPercentile.map(p => p.selected ? 'rgba(76, 175, 80, 0.8)' : 'rgba(158, 158, 158, 0.6)' ), borderColor: sortedByPercentile.map(p => p.selected ? 'rgba(76, 175, 80, 1)' : 'rgba(158, 158, 158, 0.8)' ), borderWidth: 2 } ] }; const chartOptions = { responsive: true, maintainAspectRatio: false, plugins: { legend: { display: false }, title: { display: true, text: `Probability Distribution by Frequency Percentile (Temperature: ${probData.temperature})`, font: { size: 16, weight: 'bold' } }, tooltip: { callbacks: { title: function(context) { const item = sortedByPercentile[context[0].dataIndex]; return `${item.word} ${item.selected ? '✓ SELECTED' : ''}`; }, label: function(context) { const item = sortedByPercentile[context.dataIndex]; const labels = [ `Probability: ${(item.probability * 100).toFixed(2)}%`, `Composite Score: ${item.composite_score.toFixed(3)}`, `Similarity: ${item.similarity.toFixed(3)}`, `Percentile: ${(item.percentile * 100).toFixed(1)}%`, `Tier: ${item.tier.replace('tier_', '').replace('_', ' ')}` ]; // Add normalization data if available if (item.normalization_applied && item.original_composite_score !== undefined) { labels.splice(2, 0, `Original Score: ${item.original_composite_score.toFixed(3)}`); labels.splice(3, 0, `🎯 Normalized: ${item.normalization_method}`); } return labels; } }, backgroundColor: 'rgba(0, 0, 0, 0.8)', titleColor: 'white', bodyColor: 'white', borderColor: 'rgba(255, 255, 255, 0.3)', borderWidth: 1 } }, scales: { x: { title: { display: true, text: 'Words (sorted by frequency percentile: 100% → 0%)', font: { size: 14, weight: 'bold' } }, ticks: { maxRotation: 45, minRotation: 45, font: { size: 11, weight: 'bold' } } }, y: { title: { display: true, text: 'Selection Probability (%)', font: { size: 14, weight: 'bold' } }, beginAtZero: true, ticks: { callback: function(value) { return value.toFixed(1) + '%'; } } } }, interaction: { intersect: false, mode: 'index' } }; // Configure all plugins including annotation const chartOptionsWithAnnotations = { ...chartOptions, plugins: { legend: { display: false }, title: { display: true, text: `Probability Distribution by Frequency Percentile (Temperature: ${probData.temperature})`, font: { size: 16, weight: 'bold' } }, tooltip: { callbacks: { title: function(context) { const item = sortedByPercentile[context[0].dataIndex]; return `${item.word} ${item.selected ? '✓ SELECTED' : ''}`; }, label: function(context) { const item = sortedByPercentile[context.dataIndex]; const labels = [ `Probability: ${(item.probability * 100).toFixed(2)}%`, `Composite Score: ${item.composite_score.toFixed(3)}`, `Similarity: ${item.similarity.toFixed(3)}`, `Percentile: ${(item.percentile * 100).toFixed(1)}%`, `Tier: ${item.tier.replace('tier_', '').replace('_', ' ')}` ]; // Add normalization data if available if (item.normalization_applied && item.original_composite_score !== undefined) { labels.splice(2, 0, `Original Score: ${item.original_composite_score.toFixed(3)}`); labels.splice(3, 0, `🎯 Normalized: ${item.normalization_method}`); } return labels; } }, backgroundColor: 'rgba(0, 0, 0, 0.8)', titleColor: 'white', bodyColor: 'white', borderColor: 'rgba(255, 255, 255, 0.3)', borderWidth: 1 }, annotation: { annotations: { meanLine: { type: 'line', xMin: meanWordIndex, xMax: meanWordIndex, borderColor: 'rgba(255, 99, 132, 0.8)', borderWidth: 3, borderDash: [5, 5], label: { display: true, content: 'μ', position: 'start', backgroundColor: 'rgba(255, 99, 132, 0.8)', color: 'white', font: { weight: 'bold', size: 12 } } }, sigmaBox: { type: 'box', xMin: sigmaRangeStart, xMax: sigmaRangeEnd, backgroundColor: 'rgba(54, 162, 235, 0.15)', borderColor: 'rgba(54, 162, 235, 0.5)', borderWidth: 2, label: { display: true, content: `σ (${(sigmaRangeProbMass * 100).toFixed(1)}%)`, position: 'center', backgroundColor: 'rgba(54, 162, 235, 0.8)', color: 'white', font: { weight: 'bold', size: 11 } } }, sigmaStartLine: { type: 'line', xMin: sigmaRangeStart, xMax: sigmaRangeStart, borderColor: 'rgba(54, 162, 235, 0.8)', borderWidth: 2, borderDash: [3, 3], label: { display: true, content: 'μ-σ', position: 'start', backgroundColor: 'rgba(54, 162, 235, 0.6)', color: 'white', font: { size: 10 } } }, sigmaEndLine: { type: 'line', xMin: sigmaRangeEnd, xMax: sigmaRangeEnd, borderColor: 'rgba(54, 162, 235, 0.8)', borderWidth: 2, borderDash: [3, 3], label: { display: true, content: 'μ+σ', position: 'start', backgroundColor: 'rgba(54, 162, 235, 0.6)', color: 'white', font: { size: 10 } } } } } } }; return (

Probability Distribution ({probData.total_candidates} candidates)

Selection probabilities from softmax algorithm (temperature: {probData.temperature}, difficulty: {probData.difficulty})

Selected: {probData.selected_count} words

Top Probability: {(Math.max(...sortedByPercentile.map(p => p.probability)) * 100).toFixed(1)}%

Average: {((1/probData.total_candidates) * 100).toFixed(1)}%

Temperature Effect: {probData.temperature < 1 ? 'More deterministic' : probData.temperature > 1 ? 'More random' : 'Balanced'}

{probData.normalization_enabled && (

🎯 Distribution Normalization: ENABLED ({probData.normalization_method})

)}

Mean Position: Word #{meanWordIndex + 1} ({sortedByPercentile[meanWordIndex]?.word})

Distribution Width (σ): {sigma.toFixed(1)} words

σ Sampling Zone: {(sigmaRangeProbMass * 100).toFixed(1)}% of probability mass

σ Range: Words #{sigmaRangeStart + 1}-#{sigmaRangeEnd + 1}

{/* Interactive Bar Chart */}

📊 Frequency-Based Analysis: This chart shows ALL {probData.total_candidates} candidate words sorted by frequency percentile (100% → 0%, common → rare). This reveals whether the Gaussian frequency targeting is working correctly for your selected difficulty level. Look for probability peaks at the intended percentile ranges: Easy (90%+), Medium (50%), Hard (20%). {probData.normalization_enabled && ( <> 🎯 Distribution normalization is ENABLED to ensure consistent difficulty across topics. )}

{/* Detailed Table */}

Detailed Probability Data

{sortedByPercentile.map((item, idx) => ( ))}

Rank	Word	Probability	Composite	Similarity	Percentile	Selected
{item.probability_rank}	{item.word}	{(item.probability * 100).toFixed(2)}%	{item.composite_score.toFixed(3)}	{item.similarity.toFixed(3)}	{item.percentile.toFixed(3)}	{item.selected ? '✓' : '✗'}

); } catch (error) { console.error('Error rendering probabilities:', error); return (

Probability Distribution

Error rendering chart: {error.message}

Debug data available: {JSON.stringify(Object.keys(probData || {}))}

); } }; const renderSelected = () => { const selected = debugData.selected_words || []; return (

Selected Words ({selected.length} words)

Final words chosen for crossword generation.

{renderWordTable(selected, true)}

); }; const renderSection = () => { switch (activeSection) { case 'overview': return renderOverview(); case 'word-pool': return renderWordPool(); case 'probability-analysis': return renderProbabilities(); case 'final-selection': return renderSelected(); default: return renderOverview(); } }; return (

{sections.map(section => ( ))}

{renderSection()}

); }; export default DebugTab;