danielrosehill Claude commited on
Commit
80cfd1e
·
1 Parent(s): 62cd7ca

Update Space to work with modernized dataset format

Browse files

- Load dataset using new JSONL format (split="train")
- Reconstruct nested data structure from flattened records
- Add modality key mapping for consistent naming
- Parse JSON strings for characteristics and relationships

This update makes the Space compatible with the migrated dataset
that no longer uses a Python loading script.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +60 -18
app.py CHANGED
@@ -4,26 +4,68 @@ import json
4
  import pandas as pd
5
 
6
  # Load the dataset
7
- dataset = load_dataset("danielrosehill/multimodal-ai-taxonomy")
8
 
9
- # Extract taxonomy data
10
  taxonomy_data = {}
11
- for split_name in dataset.keys():
12
- if split_name.startswith("taxonomy_"):
13
- # Parse the split name to get modality and operation type
14
- parts = split_name.replace("taxonomy_", "").split("_")
15
- if len(parts) >= 3:
16
- modality_parts = parts[:-1]
17
- operation = parts[-1]
18
- modality = "_".join(modality_parts)
19
-
20
- if modality not in taxonomy_data:
21
- taxonomy_data[modality] = {}
22
-
23
- # Get the modalities from this split
24
- data = dataset[split_name]
25
- if len(data) > 0:
26
- taxonomy_data[modality][operation] = json.loads(data[0]['json'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Define modality display names and emojis
29
  MODALITY_INFO = {
 
4
  import pandas as pd
5
 
6
  # Load the dataset
7
+ dataset = load_dataset("danielrosehill/multimodal-ai-taxonomy", split="train")
8
 
9
+ # Extract taxonomy data and reconstruct nested structure
10
  taxonomy_data = {}
11
+
12
+ for record in dataset:
13
+ # Get modality info
14
+ output_modality = record['output_modality']
15
+ operation_type = record['operation_type']
16
+
17
+ # Map output_modality to the keys used in MODALITY_INFO
18
+ modality_key_map = {
19
+ "video": "video_generation",
20
+ "audio": "audio_generation",
21
+ "image": "image_generation",
22
+ "text": "text_generation",
23
+ "3d": "3d_generation",
24
+ "3d-model": "3d_generation"
25
+ }
26
+
27
+ modality_key = modality_key_map.get(output_modality, f"{output_modality}_generation")
28
+
29
+ # Initialize nested structure
30
+ if modality_key not in taxonomy_data:
31
+ taxonomy_data[modality_key] = {}
32
+
33
+ if operation_type not in taxonomy_data[modality_key]:
34
+ taxonomy_data[modality_key][operation_type] = {
35
+ "description": f"{output_modality.title()} {operation_type} modalities",
36
+ "outputModality": output_modality,
37
+ "operationType": operation_type,
38
+ "modalities": []
39
+ }
40
+
41
+ # Reconstruct the nested modality object
42
+ modality_obj = {
43
+ "id": record['id'],
44
+ "name": record['name'],
45
+ "input": {
46
+ "primary": record['input_primary'],
47
+ "secondary": record['input_secondary']
48
+ },
49
+ "output": {
50
+ "primary": record['output_primary'],
51
+ "audio": record['output_audio']
52
+ },
53
+ "characteristics": json.loads(record['characteristics']) if record['characteristics'] else {},
54
+ "metadata": {
55
+ "maturityLevel": record['metadata_maturity_level'],
56
+ "commonUseCases": record['metadata_common_use_cases'],
57
+ "platforms": record['metadata_platforms'],
58
+ "exampleModels": record['metadata_example_models']
59
+ },
60
+ "relationships": json.loads(record['relationships']) if record['relationships'] else {}
61
+ }
62
+
63
+ # Add audio type if present
64
+ if record['output_audio'] and record.get('output_audio_type'):
65
+ modality_obj["output"]["audioType"] = record['output_audio_type']
66
+
67
+ # Add to taxonomy data
68
+ taxonomy_data[modality_key][operation_type]["modalities"].append(modality_obj)
69
 
70
  # Define modality display names and emojis
71
  MODALITY_INFO = {