{ "dataset": "/root/lang_mix.jsonl", "tokenizer": "/root/tokenizers", "out_dir": "/root/models/run_cen_V", "use_hf": false, "hf_name": null, "hf_subset": null, "hf_split": "train", "hf_streaming": true, "hf_text_field": "text", "hf_messages_field": "messages", "shuffle_buffer_size": 10000, "seed": 1337, "init_from_checkpoint": null, "finetune_from": null, "strict_vocab_match": false, "save_tag": null, "save_every_n": 2000, "keep_last_k": 3, "save_on_improve": false, "improve_delta": 0.0, "resume_from_checkpoint": null, "log_loss_to_csv": false, "dataset_text_field": "text", "min_sample_token_length": 8, "stream_local_dataset": false, "local_dataset_shuffle_buffer": 2048, "block_count_sample_fraction": 0.02, "block_count_min_sample_megabytes": 32, "block_count_max_sample_megabytes": 512, "precision": "bf16", "optimizer": "adamw", "lr": 0.0003, "weight_decay": 0.06, "beta2": 0.98, "adam_eps": 1e-08, "grad_clip": 0.8, "optim_eps": 1e-08, "lr_scheduler": "cosine", "min_lr_ratio": 0.05, "muon_lr": null, "muon_momentum": 0.95, "muon_exclude_embeddings": true, "muon_beta1": null, "muon_beta2": null, "muon_eps": null, "muon_bias_correction": true, "muon_clip_by_layer": false, "muon_lr_correction": true, "batch_size": 4, "accum_steps": 16, "epochs": 2, "warmup_frac": 0.05, "log_every_n": 10, "overfit_subset": null, "use_gradient_checkpoint": false, "num_workers": 2, "pin_memory": true, "persistent_workers": true, "sft_mode": "lora", "lora_r": 64, "lora_alpha": 96, "lora_dropout": 0.05, "include_agent_end": true, "include_eos": false, "mask_user_queries": true }