Spaces:

danielrosehill
/

Multimodal-AI-Taxonomy

Sleeping

danielrosehill Claude commited on Oct 22

Commit

80cfd1e

1 Parent(s): 62cd7ca

Update Space to work with modernized dataset format

- Load dataset using new JSONL format (split="train")
- Reconstruct nested data structure from flattened records
- Add modality key mapping for consistent naming
- Parse JSON strings for characteristics and relationships

This update makes the Space compatible with the migrated dataset
that no longer uses a Python loading script.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +60 -18

app.py CHANGED Viewed

@@ -4,26 +4,68 @@ import json
 import pandas as pd
 # Load the dataset
-dataset = load_dataset("danielrosehill/multimodal-ai-taxonomy")
-# Extract taxonomy data
 taxonomy_data = {}
-for split_name in dataset.keys():
-    if split_name.startswith("taxonomy_"):
-        # Parse the split name to get modality and operation type
-        parts = split_name.replace("taxonomy_", "").split("_")
-        if len(parts) >= 3:
-            modality_parts = parts[:-1]
-            operation = parts[-1]
-            modality = "_".join(modality_parts)
-            if modality not in taxonomy_data:
-                taxonomy_data[modality] = {}
-            # Get the modalities from this split
-            data = dataset[split_name]
-            if len(data) > 0:
-                taxonomy_data[modality][operation] = json.loads(data[0]['json'])
 # Define modality display names and emojis
 MODALITY_INFO = {

 import pandas as pd
 # Load the dataset
+dataset = load_dataset("danielrosehill/multimodal-ai-taxonomy", split="train")
+# Extract taxonomy data and reconstruct nested structure
 taxonomy_data = {}
+for record in dataset:
+    # Get modality info
+    output_modality = record['output_modality']
+    operation_type = record['operation_type']
+    # Map output_modality to the keys used in MODALITY_INFO
+    modality_key_map = {
+        "video": "video_generation",
+        "audio": "audio_generation",
+        "image": "image_generation",
+        "text": "text_generation",
+        "3d": "3d_generation",
+        "3d-model": "3d_generation"
+    }
+    modality_key = modality_key_map.get(output_modality, f"{output_modality}_generation")
+    # Initialize nested structure
+    if modality_key not in taxonomy_data:
+        taxonomy_data[modality_key] = {}
+    if operation_type not in taxonomy_data[modality_key]:
+        taxonomy_data[modality_key][operation_type] = {
+            "description": f"{output_modality.title()} {operation_type} modalities",
+            "outputModality": output_modality,
+            "operationType": operation_type,
+            "modalities": []
+        }
+    # Reconstruct the nested modality object
+    modality_obj = {
+        "id": record['id'],
+        "name": record['name'],
+        "input": {
+            "primary": record['input_primary'],
+            "secondary": record['input_secondary']
+        },
+        "output": {
+            "primary": record['output_primary'],
+            "audio": record['output_audio']
+        },
+        "characteristics": json.loads(record['characteristics']) if record['characteristics'] else {},
+        "metadata": {
+            "maturityLevel": record['metadata_maturity_level'],
+            "commonUseCases": record['metadata_common_use_cases'],
+            "platforms": record['metadata_platforms'],
+            "exampleModels": record['metadata_example_models']
+        },
+        "relationships": json.loads(record['relationships']) if record['relationships'] else {}
+    }
+    # Add audio type if present
+    if record['output_audio'] and record.get('output_audio_type'):
+        modality_obj["output"]["audioType"] = record['output_audio_type']
+    # Add to taxonomy data
+    taxonomy_data[modality_key][operation_type]["modalities"].append(modality_obj)
 # Define modality display names and emojis
 MODALITY_INFO = {