import gradio as gr
from datasets import load_dataset
import json
import pandas as pd
# Load the dataset
dataset = load_dataset("danielrosehill/multimodal-ai-taxonomy", split="train")
# Extract taxonomy data and reconstruct nested structure
taxonomy_data = {}
for record in dataset:
# Get modality info
output_modality = record['output_modality']
operation_type = record['operation_type']
# Map output_modality to the keys used in MODALITY_INFO
modality_key_map = {
"video": "video_generation",
"audio": "audio_generation",
"image": "image_generation",
"text": "text_generation",
"3d": "3d_generation",
"3d-model": "3d_generation"
}
modality_key = modality_key_map.get(output_modality, f"{output_modality}_generation")
# Initialize nested structure
if modality_key not in taxonomy_data:
taxonomy_data[modality_key] = {}
if operation_type not in taxonomy_data[modality_key]:
taxonomy_data[modality_key][operation_type] = {
"description": f"{output_modality.title()} {operation_type} modalities",
"outputModality": output_modality,
"operationType": operation_type,
"modalities": []
}
# Reconstruct the nested modality object
modality_obj = {
"id": record['id'],
"name": record['name'],
"input": {
"primary": record['input_primary'],
"secondary": record['input_secondary']
},
"output": {
"primary": record['output_primary'],
"audio": record['output_audio']
},
"characteristics": json.loads(record['characteristics']) if record['characteristics'] else {},
"metadata": {
"maturityLevel": record['metadata_maturity_level'],
"commonUseCases": record['metadata_common_use_cases'],
"platforms": record['metadata_platforms'],
"exampleModels": record['metadata_example_models']
},
"relationships": json.loads(record['relationships']) if record['relationships'] else {}
}
# Add audio type if present
if record['output_audio'] and record.get('output_audio_type'):
modality_obj["output"]["audioType"] = record['output_audio_type']
# Add to taxonomy data
taxonomy_data[modality_key][operation_type]["modalities"].append(modality_obj)
# Define modality display names
MODALITY_INFO = {
"video_generation": {"name": "Video Generation", "color": "#FF6B6B"},
"audio_generation": {"name": "Audio Generation", "color": "#4ECDC4"},
"image_generation": {"name": "Image Generation", "color": "#95E1D3"},
"text_generation": {"name": "Text Generation", "color": "#F38181"},
"3d_generation": {"name": "3D Generation", "color": "#AA96DA"},
}
# CSS for styling
custom_css = """
.modality-card {
border: 2px solid #e0e0e0;
border-radius: 10px;
padding: 20px;
margin: 10px 0;
background: white;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.modality-header {
font-size: 1.5em;
font-weight: bold;
margin-bottom: 10px;
color: #333;
}
.modality-meta {
background: #f5f5f5;
padding: 10px;
border-radius: 5px;
margin: 10px 0;
}
.badge {
display: inline-block;
padding: 4px 12px;
border-radius: 12px;
margin: 2px;
font-size: 0.85em;
font-weight: 500;
}
.badge-mature { background: #4CAF50; color: white; }
.badge-emerging { background: #FF9800; color: white; }
.badge-experimental { background: #9C27B0; color: white; }
.index-card {
border: 2px solid #ddd;
border-radius: 15px;
padding: 30px;
margin: 15px;
text-align: center;
cursor: pointer;
transition: all 0.3s;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
}
.index-card:hover {
transform: translateY(-5px);
box-shadow: 0 10px 20px rgba(0,0,0,0.2);
}
.stat-box {
background: #f8f9fa;
border-radius: 10px;
padding: 15px;
margin: 10px;
text-align: center;
}
"""
def create_modality_card(modality_obj):
"""Create an HTML card for a single modality"""
# Maturity badge
maturity = modality_obj['metadata']['maturityLevel']
badge_class = f"badge badge-{maturity}"
# Input/Output info
input_primary = modality_obj['input']['primary']
input_secondary = modality_obj['input'].get('secondary', [])
output_primary = modality_obj['output']['primary']
# Build input string
input_str = f"**Primary:** {input_primary}"
if input_secondary:
input_str += f"
**Secondary:** {', '.join(input_secondary)}"
# Audio info for output
audio_info = ""
if modality_obj['output'].get('audio'):
audio_type = modality_obj['output'].get('audioType', 'N/A')
audio_info = f"
**Audio:** {audio_type}"
# Characteristics
chars = modality_obj.get('characteristics', {})
char_items = [f"**{k}:** {v}" for k, v in chars.items()]
char_str = "
".join(char_items) if char_items else "N/A"
# Use cases
use_cases = modality_obj['metadata'].get('commonUseCases', [])
use_case_str = "
• " + "
• ".join(use_cases) if use_cases else "N/A"
# Platforms
platforms = modality_obj['metadata'].get('platforms', [])
platform_str = ", ".join(platforms) if platforms else "N/A"
# Example models
models = modality_obj['metadata'].get('exampleModels', [])
model_str = ", ".join(models) if models else "N/A"
html = f"""
Platforms: {platform_str}
Example Models: {model_str}
An attempt to define a structured taxonomy for multimodal generative AI capabilities, organized by output modality and operation type.
Dataset repository: danielrosehill/multimodal-ai-taxonomy
Navigate through the tabs above to explore different output modalities (Video, Audio, Image, Text, 3D).
Each modality is organized into Creation (generating new content) and Editing (modifying existing content) operations.
Click on the details sections to expand and see characteristics, use cases, platforms, and example models.
No data found for {modality_key}
" if operation_type not in taxonomy_data[modality_key]: return f"No {operation_type} data found for {modality_key}
" data = taxonomy_data[modality_key][operation_type] modalities = data.get('modalities', []) info = MODALITY_INFO.get(modality_key, {"name": modality_key, "color": "#666"}) html = f"""{data.get('description', '')}