vimalk78's picture
fix: clean up repository after removing LFS cache files
9cd7541
import React, { useState } from 'react';
import {
Chart as ChartJS,
CategoryScale,
LinearScale,
BarElement,
Title,
Tooltip,
Legend,
} from 'chart.js';
import annotationPlugin from 'chartjs-plugin-annotation';
import { Bar } from 'react-chartjs-2';
// Register Chart.js components
ChartJS.register(
CategoryScale,
LinearScale,
BarElement,
Title,
Tooltip,
Legend,
annotationPlugin
);
const DebugTab = ({ debugData }) => {
const [activeSection, setActiveSection] = useState('overview');
const [sortBy, setSortBy] = useState('similarity');
const [sortDirection, setSortDirection] = useState('desc');
const [wordPoolView, setWordPoolView] = useState('all'); // 'all' or 'candidates'
if (!debugData || !debugData.enabled) {
return (
<div className="debug-tab">
<p>Debug data not available. Set ENABLE_DEBUG_TAB=true on the backend.</p>
</div>
);
}
const sections = [
{ id: 'overview', label: 'Overview' },
{ id: 'word-pool', label: 'Word Pool' },
{ id: 'probability-analysis', label: 'Probability Analysis' },
{ id: 'final-selection', label: 'Final Selection' }
];
const renderOverview = () => (
<div className="debug-section">
<h3>Generation Parameters</h3>
<div className="debug-grid">
<div><strong>Topics:</strong> {debugData.generation_params.topics.join(', ')}</div>
<div><strong>Difficulty:</strong> {debugData.generation_params.difficulty}</div>
<div><strong>Requested Words:</strong> {debugData.generation_params.requested_words}</div>
<div><strong>Thematic Pool Size:</strong> {debugData.generation_params.thematic_pool_size}</div>
<div><strong>Min Similarity:</strong> {debugData.generation_params.min_similarity}</div>
<div><strong>Multi-theme:</strong> {debugData.generation_params.multi_theme ? 'Yes' : 'No'}</div>
{debugData.generation_params.multi_topic_method && (
<div><strong>Multi-Topic Method:</strong> {debugData.generation_params.multi_topic_method}</div>
)}
{debugData.generation_params.soft_min_beta && (
<div><strong>Soft Min Beta:</strong> {debugData.generation_params.soft_min_beta}</div>
)}
{debugData.generation_params.custom_sentence && (
<div><strong>Custom Sentence:</strong> "{debugData.generation_params.custom_sentence}"</div>
)}
</div>
<h3>Selection Algorithm</h3>
<div className="debug-grid">
<div><strong>Method:</strong> {debugData.selection_method}</div>
<div><strong>Temperature:</strong> {debugData.selection_params.similarity_temperature} (lower = more deterministic)</div>
<div><strong>Difficulty Weight:</strong> {debugData.selection_params.difficulty_weight} (balance between similarity and frequency)</div>
<div><strong>Use Softmax:</strong> {debugData.selection_params.use_softmax_selection ? 'Yes' : 'No'}</div>
</div>
<h4>How it works:</h4>
<ul>
<li><strong>Composite Score</strong> = (1 - difficulty_weight) Γ— similarity + difficulty_weight Γ— frequency_alignment</li>
<li><strong>Frequency Alignment</strong>: Gaussian distribution favoring target percentiles by difficulty</li>
<li><strong>Softmax Selection</strong>: Probabilistic selection based on composite scores with temperature control</li>
{debugData.generation_params.multi_topic_method && (
<li><strong>Multi-Topic Similarity:</strong> Uses {debugData.generation_params.multi_topic_method} method to find words relevant to ALL topics</li>
)}
</ul>
<h4>Difficulty Targets:</h4>
<ul>
<li><strong>Easy:</strong> 90th percentile (common words like CAT, DOG)</li>
<li><strong>Medium:</strong> 50th percentile (balanced selection)</li>
<li><strong>Hard:</strong> 20th percentile (rare words like QUETZAL, PLATYPUS)</li>
</ul>
<h3>Results Summary</h3>
<div className="debug-grid">
<div><strong>Thematic Pool:</strong> {debugData.thematic_pool?.length || 0} words</div>
<div><strong>Candidates:</strong> {debugData.candidate_words?.length || 0} words</div>
<div><strong>Selected:</strong> {debugData.selected_words?.length || 0} words</div>
</div>
</div>
);
const renderWordTable = (words, showClue = false) => (
<div className="word-table-container">
<table className="word-table">
<thead>
<tr>
<th>Word</th>
<th>Similarity</th>
<th>Percentile</th>
<th>Tier</th>
{showClue && <th>Clue</th>}
</tr>
</thead>
<tbody>
{words.map((word, idx) => (
<tr key={idx}>
<td><strong>{word.word}</strong></td>
<td>{word.similarity.toFixed(3)}</td>
<td>{word.percentile.toFixed(3)}</td>
<td title={word.tier_description || word.tier}>{word.tier.replace('tier_', '').replace('_', ' ')}</td>
{showClue && <td>{word.clue}</td>}
</tr>
))}
</tbody>
</table>
</div>
);
const handleSort = (column) => {
if (sortBy === column) {
setSortDirection(sortDirection === 'asc' ? 'desc' : 'asc');
} else {
setSortBy(column);
setSortDirection(column === 'word' ? 'asc' : 'desc');
}
};
const getSortIcon = (column) => {
if (sortBy !== column) return ' ↕️';
return sortDirection === 'asc' ? ' β–²' : ' β–Ό';
};
const renderSortableThematicPool = () => {
const pool = debugData.thematic_pool || [];
const sortedPool = [...pool].sort((a, b) => {
let aVal, bVal;
switch (sortBy) {
case 'word':
aVal = a.word.toLowerCase();
bVal = b.word.toLowerCase();
break;
case 'similarity':
aVal = a.similarity;
bVal = b.similarity;
break;
case 'percentile':
aVal = a.percentile;
bVal = b.percentile;
break;
default:
aVal = a.similarity;
bVal = b.similarity;
}
if (sortDirection === 'asc') {
return aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
} else {
return aVal > bVal ? -1 : aVal < bVal ? 1 : 0;
}
});
return (
<div className="word-table-container">
<table className="word-table">
<thead>
<tr>
<th
onClick={() => handleSort('word')}
style={{ cursor: 'pointer', userSelect: 'none' }}
className={sortBy === 'word' ? 'sorted-column' : ''}
>
Word{getSortIcon('word')}
</th>
<th
onClick={() => handleSort('similarity')}
style={{ cursor: 'pointer', userSelect: 'none' }}
className={sortBy === 'similarity' ? 'sorted-column' : ''}
title={debugData.generation_params.multi_topic_method ?
`Multi-Topic Similarity (${debugData.generation_params.multi_topic_method}): Score representing relevance to ALL topics simultaneously. ${debugData.generation_params.multi_topic_method === 'soft_minimum' ? 'Uses soft minimum aggregation (Ξ²=' + debugData.generation_params.soft_min_beta + ') - high scores mean the word relates well to every selected topic.' : 'Aggregated across all topics.'}` :
'Similarity: Semantic similarity score to the selected topic (0.0 to 1.0)'
}
>
Similarity{getSortIcon('similarity')}
</th>
<th
onClick={() => handleSort('percentile')}
style={{ cursor: 'pointer', userSelect: 'none' }}
className={sortBy === 'percentile' ? 'sorted-column' : ''}
>
Percentile{getSortIcon('percentile')}
</th>
<th>Tier</th>
</tr>
</thead>
<tbody>
{sortedPool.map((word, idx) => (
<tr key={idx}>
<td><strong>{word.word}</strong></td>
<td>{word.similarity.toFixed(3)}</td>
<td>{word.percentile.toFixed(3)}</td>
<td title={word.tier_description || word.tier}>{word.tier.replace('tier_', '').replace('_', ' ')}</td>
</tr>
))}
</tbody>
</table>
</div>
);
};
const renderThematicPool = () => {
const pool = debugData.thematic_pool || [];
return (
<div className="debug-section">
<h3>Thematic Pool ({pool.length} words)</h3>
<p>All words generated thematically. Click column headers to sort.</p>
{renderSortableThematicPool()}
</div>
);
};
const renderCandidates = () => {
const candidates = debugData.candidate_words || [];
return (
<div className="debug-section">
<h3>Candidate Words ({candidates.length} words)</h3>
<p>Words that passed filtering and got clues generated.</p>
{renderWordTable(candidates, true)}
</div>
);
};
const renderWordPool = () => {
const pool = debugData.thematic_pool || [];
const candidates = debugData.candidate_words || [];
const isAllView = wordPoolView === 'all';
const currentData = isAllView ? pool : candidates;
return (
<div className="debug-section">
<h3>Word Pool</h3>
{/* Toggle buttons */}
<div className="pool-view-toggle" style={{ marginBottom: '15px' }}>
<button
className={`toggle-btn ${isAllView ? 'active' : ''}`}
onClick={() => setWordPoolView('all')}
style={{
padding: '8px 16px',
marginRight: '8px',
backgroundColor: isAllView ? '#4CAF50' : '#f0f0f0',
color: isAllView ? 'white' : '#333',
border: '1px solid #ccc',
borderRadius: '4px',
cursor: 'pointer'
}}
>
All Words ({pool.length})
</button>
<button
className={`toggle-btn ${!isAllView ? 'active' : ''}`}
onClick={() => setWordPoolView('candidates')}
style={{
padding: '8px 16px',
backgroundColor: !isAllView ? '#4CAF50' : '#f0f0f0',
color: !isAllView ? 'white' : '#333',
border: '1px solid #ccc',
borderRadius: '4px',
cursor: 'pointer'
}}
>
With Clues ({candidates.length})
</button>
</div>
<p>
{isAllView
? 'All words generated thematically. Click column headers to sort.'
: 'Words that passed filtering and got clues generated.'
}
</p>
{isAllView ? renderSortableThematicPool() : renderWordTable(candidates, true)}
</div>
);
};
const renderSelection = () => (
<div className="debug-section">
<h3>Selection Process</h3>
<div className="debug-grid">
<div><strong>Algorithm:</strong> {debugData.selection_method}</div>
<div><strong>Temperature:</strong> {debugData.selection_params.similarity_temperature} (lower = more deterministic)</div>
<div><strong>Difficulty Weight:</strong> {debugData.selection_params.difficulty_weight} (balance between similarity and frequency)</div>
</div>
<h4>How it works:</h4>
<ul>
<li><strong>Composite Score</strong> = (1 - difficulty_weight) Γ— similarity + difficulty_weight Γ— frequency_alignment</li>
<li><strong>Frequency Alignment</strong>: Gaussian distribution favoring target percentiles by difficulty</li>
<li><strong>Softmax Selection</strong>: Probabilistic selection based on composite scores with temperature control</li>
{debugData.generation_params.multi_topic_method && (
<li><strong>Multi-Topic Similarity:</strong> Uses {debugData.generation_params.multi_topic_method} method to find words relevant to ALL topics</li>
)}
</ul>
<h4>Difficulty Targets:</h4>
<ul>
<li><strong>Easy:</strong> 90th percentile (common words like CAT, DOG)</li>
<li><strong>Medium:</strong> 50th percentile (balanced selection)</li>
<li><strong>Hard:</strong> 20th percentile (rare words like QUETZAL, PLATYPUS)</li>
</ul>
</div>
);
const renderProbabilities = () => {
const probData = debugData.probability_distribution;
if (!probData || !probData.probabilities) {
return (
<div className="debug-section">
<h3>Probability Distribution</h3>
<p>Probability data not available (only shown with softmax selection).</p>
</div>
);
}
try {
const probabilities = probData.probabilities;
// Sort by percentile (descending) to show 100% -> 0% left to right
const sortedByPercentile = [...probabilities].sort((a, b) => b.percentile - a.percentile);
// Calculate distribution statistics based on position in sorted array
const mean = sortedByPercentile.reduce((sum, p, i) => sum + (p.probability || 0) * i, 0);
const variance = sortedByPercentile.reduce((sum, p, i) => sum + (p.probability || 0) * Math.pow(i - mean, 2), 0);
const sigma = Math.sqrt(Math.max(0, variance)); // Ensure no negative variance
const meanWordIndex = Math.max(0, Math.min(sortedByPercentile.length - 1, Math.round(mean)));
const sigmaRangeStart = Math.max(0, Math.round(mean - sigma));
const sigmaRangeEnd = Math.min(sortedByPercentile.length - 1, Math.round(mean + sigma));
// Calculate sampling statistics with bounds checking
const sigmaRangeProbMass = sortedByPercentile
.slice(sigmaRangeStart, sigmaRangeEnd + 1)
.reduce((sum, p) => sum + (p.probability || 0), 0);
// Prepare chart data - sorted by percentile to reveal Gaussian targeting
const chartData = {
labels: sortedByPercentile.map(p => `${p.word}\n(${(p.percentile * 100).toFixed(0)}%)`),
datasets: [
{
label: 'Selection Probability (%)',
data: sortedByPercentile.map(p => p.probability * 100),
backgroundColor: sortedByPercentile.map(p =>
p.selected ? 'rgba(76, 175, 80, 0.8)' : 'rgba(158, 158, 158, 0.6)'
),
borderColor: sortedByPercentile.map(p =>
p.selected ? 'rgba(76, 175, 80, 1)' : 'rgba(158, 158, 158, 0.8)'
),
borderWidth: 2
}
]
};
const chartOptions = {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: {
display: false
},
title: {
display: true,
text: `Probability Distribution by Frequency Percentile (Temperature: ${probData.temperature})`,
font: {
size: 16,
weight: 'bold'
}
},
tooltip: {
callbacks: {
title: function(context) {
const item = sortedByPercentile[context[0].dataIndex];
return `${item.word} ${item.selected ? 'βœ“ SELECTED' : ''}`;
},
label: function(context) {
const item = sortedByPercentile[context.dataIndex];
const labels = [
`Probability: ${(item.probability * 100).toFixed(2)}%`,
`Composite Score: ${item.composite_score.toFixed(3)}`,
`Similarity: ${item.similarity.toFixed(3)}`,
`Percentile: ${(item.percentile * 100).toFixed(1)}%`,
`Tier: ${item.tier.replace('tier_', '').replace('_', ' ')}`
];
// Add normalization data if available
if (item.normalization_applied && item.original_composite_score !== undefined) {
labels.splice(2, 0, `Original Score: ${item.original_composite_score.toFixed(3)}`);
labels.splice(3, 0, `🎯 Normalized: ${item.normalization_method}`);
}
return labels;
}
},
backgroundColor: 'rgba(0, 0, 0, 0.8)',
titleColor: 'white',
bodyColor: 'white',
borderColor: 'rgba(255, 255, 255, 0.3)',
borderWidth: 1
}
},
scales: {
x: {
title: {
display: true,
text: 'Words (sorted by frequency percentile: 100% β†’ 0%)',
font: {
size: 14,
weight: 'bold'
}
},
ticks: {
maxRotation: 45,
minRotation: 45,
font: {
size: 11,
weight: 'bold'
}
}
},
y: {
title: {
display: true,
text: 'Selection Probability (%)',
font: {
size: 14,
weight: 'bold'
}
},
beginAtZero: true,
ticks: {
callback: function(value) {
return value.toFixed(1) + '%';
}
}
}
},
interaction: {
intersect: false,
mode: 'index'
}
};
// Configure all plugins including annotation
const chartOptionsWithAnnotations = {
...chartOptions,
plugins: {
legend: {
display: false
},
title: {
display: true,
text: `Probability Distribution by Frequency Percentile (Temperature: ${probData.temperature})`,
font: {
size: 16,
weight: 'bold'
}
},
tooltip: {
callbacks: {
title: function(context) {
const item = sortedByPercentile[context[0].dataIndex];
return `${item.word} ${item.selected ? 'βœ“ SELECTED' : ''}`;
},
label: function(context) {
const item = sortedByPercentile[context.dataIndex];
const labels = [
`Probability: ${(item.probability * 100).toFixed(2)}%`,
`Composite Score: ${item.composite_score.toFixed(3)}`,
`Similarity: ${item.similarity.toFixed(3)}`,
`Percentile: ${(item.percentile * 100).toFixed(1)}%`,
`Tier: ${item.tier.replace('tier_', '').replace('_', ' ')}`
];
// Add normalization data if available
if (item.normalization_applied && item.original_composite_score !== undefined) {
labels.splice(2, 0, `Original Score: ${item.original_composite_score.toFixed(3)}`);
labels.splice(3, 0, `🎯 Normalized: ${item.normalization_method}`);
}
return labels;
}
},
backgroundColor: 'rgba(0, 0, 0, 0.8)',
titleColor: 'white',
bodyColor: 'white',
borderColor: 'rgba(255, 255, 255, 0.3)',
borderWidth: 1
},
annotation: {
annotations: {
meanLine: {
type: 'line',
xMin: meanWordIndex,
xMax: meanWordIndex,
borderColor: 'rgba(255, 99, 132, 0.8)',
borderWidth: 3,
borderDash: [5, 5],
label: {
display: true,
content: 'ΞΌ',
position: 'start',
backgroundColor: 'rgba(255, 99, 132, 0.8)',
color: 'white',
font: {
weight: 'bold',
size: 12
}
}
},
sigmaBox: {
type: 'box',
xMin: sigmaRangeStart,
xMax: sigmaRangeEnd,
backgroundColor: 'rgba(54, 162, 235, 0.15)',
borderColor: 'rgba(54, 162, 235, 0.5)',
borderWidth: 2,
label: {
display: true,
content: `Οƒ (${(sigmaRangeProbMass * 100).toFixed(1)}%)`,
position: 'center',
backgroundColor: 'rgba(54, 162, 235, 0.8)',
color: 'white',
font: {
weight: 'bold',
size: 11
}
}
},
sigmaStartLine: {
type: 'line',
xMin: sigmaRangeStart,
xMax: sigmaRangeStart,
borderColor: 'rgba(54, 162, 235, 0.8)',
borderWidth: 2,
borderDash: [3, 3],
label: {
display: true,
content: 'ΞΌ-Οƒ',
position: 'start',
backgroundColor: 'rgba(54, 162, 235, 0.6)',
color: 'white',
font: {
size: 10
}
}
},
sigmaEndLine: {
type: 'line',
xMin: sigmaRangeEnd,
xMax: sigmaRangeEnd,
borderColor: 'rgba(54, 162, 235, 0.8)',
borderWidth: 2,
borderDash: [3, 3],
label: {
display: true,
content: 'ΞΌ+Οƒ',
position: 'start',
backgroundColor: 'rgba(54, 162, 235, 0.6)',
color: 'white',
font: {
size: 10
}
}
}
}
}
}
};
return (
<div className="debug-section">
<h3>Probability Distribution ({probData.total_candidates} candidates)</h3>
<p>Selection probabilities from softmax algorithm (temperature: {probData.temperature}, difficulty: {probData.difficulty})</p>
<div className="prob-summary">
<div><strong>Selected:</strong> {probData.selected_count} words</div>
<div><strong>Top Probability:</strong> {(Math.max(...sortedByPercentile.map(p => p.probability)) * 100).toFixed(1)}%</div>
<div><strong>Average:</strong> {((1/probData.total_candidates) * 100).toFixed(1)}%</div>
<div><strong>Temperature Effect:</strong> {probData.temperature < 1 ? 'More deterministic' : probData.temperature > 1 ? 'More random' : 'Balanced'}</div>
{probData.normalization_enabled && (
<div style={{backgroundColor: '#e8f5e8', padding: '4px', borderRadius: '4px'}}>
<strong>🎯 Distribution Normalization:</strong> ENABLED ({probData.normalization_method})
</div>
)}
<div><strong>Mean Position:</strong> Word #{meanWordIndex + 1} ({sortedByPercentile[meanWordIndex]?.word})</div>
<div><strong>Distribution Width (Οƒ):</strong> {sigma.toFixed(1)} words</div>
<div><strong>Οƒ Sampling Zone:</strong> {(sigmaRangeProbMass * 100).toFixed(1)}% of probability mass</div>
<div><strong>Οƒ Range:</strong> Words #{sigmaRangeStart + 1}-#{sigmaRangeEnd + 1}</div>
</div>
{/* Interactive Bar Chart */}
<div className="chart-container">
<div style={{ height: '500px', marginBottom: '20px' }}>
<Bar data={chartData} options={chartOptionsWithAnnotations} />
</div>
<p className="chart-description">
<strong>πŸ“Š Frequency-Based Analysis:</strong> This chart shows ALL {probData.total_candidates} candidate words sorted by
frequency percentile (100% β†’ 0%, common β†’ rare). This reveals whether the Gaussian frequency targeting
is working correctly for your selected difficulty level. Look for probability peaks at the intended percentile ranges:
<strong> Easy (90%+), Medium (50%), Hard (20%)</strong>.
{probData.normalization_enabled && (
<> <strong>🎯 Distribution normalization is ENABLED</strong> to ensure consistent difficulty across topics.</>
)}
</p>
</div>
{/* Detailed Table */}
<h4>Detailed Probability Data</h4>
<div className="probability-table-container">
<table className="probability-table">
<thead>
<tr>
<th>Rank</th>
<th>Word</th>
<th>Probability</th>
<th>Composite</th>
<th>Similarity</th>
<th>Percentile</th>
<th>Selected</th>
</tr>
</thead>
<tbody>
{sortedByPercentile.map((item, idx) => (
<tr key={idx} className={item.selected ? 'selected-word' : ''}>
<td>{item.probability_rank}</td>
<td><strong>{item.word}</strong></td>
<td>
<div className="probability-cell">
<span className="prob-text">{(item.probability * 100).toFixed(2)}%</span>
<div
className="prob-bar"
style={{
width: `${Math.max(2, item.probability * 100 * 2)}px`,
backgroundColor: item.selected ? '#4CAF50' : '#e0e0e0'
}}
/>
</div>
</td>
<td>{item.composite_score.toFixed(3)}</td>
<td>{item.similarity.toFixed(3)}</td>
<td>{item.percentile.toFixed(3)}</td>
<td>{item.selected ? 'βœ“' : 'βœ—'}</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
);
} catch (error) {
console.error('Error rendering probabilities:', error);
return (
<div className="debug-section">
<h3>Probability Distribution</h3>
<p style={{color: 'red'}}>Error rendering chart: {error.message}</p>
<p>Debug data available: {JSON.stringify(Object.keys(probData || {}))}</p>
</div>
);
}
};
const renderSelected = () => {
const selected = debugData.selected_words || [];
return (
<div className="debug-section">
<h3>Selected Words ({selected.length} words)</h3>
<p>Final words chosen for crossword generation.</p>
{renderWordTable(selected, true)}
</div>
);
};
const renderSection = () => {
switch (activeSection) {
case 'overview': return renderOverview();
case 'word-pool': return renderWordPool();
case 'probability-analysis': return renderProbabilities();
case 'final-selection': return renderSelected();
default: return renderOverview();
}
};
return (
<div className="debug-tab">
<div className="debug-nav">
{sections.map(section => (
<button
key={section.id}
className={`debug-nav-btn ${activeSection === section.id ? 'active' : ''}`}
onClick={() => setActiveSection(section.id)}
>
{section.label}
</button>
))}
</div>
<div className="debug-content">
{renderSection()}
</div>
</div>
);
};
export default DebugTab;