| { |
| "model_class": "GLMModel", |
| "tokenizer_type": "glm_ChineseSPTokenizer", |
| "num_layers": 48, |
| "hidden_size": 4096, |
| "num_attention_heads": 64, |
| "vocab_size": 50048, |
| "hidden_dropout": 0.1, |
| "attention_dropout": 0.1, |
| "layernorm_order": "pre", |
| "model_parallel_size": 1, |
| "max_sequence_length": 1025, |
| "block_lm": "true", |
| "masked_lm": false, |
| "bert_prob": 0.5, |
| "gpt_infill_prob": 0.5, |
| "gpt_min_ratio": 0.5, |
| "gap_sentence_prob": 0.0, |
| "gap_sentence_ratio": 0.15, |
| "avg_block_length": 3, |
| "short_seq_prob": 0.0, |
| "single_span_prob": 0.0, |
| "task_mask": "true", |
| "no_shuffle_block": false, |
| "no_block_position": false, |
| "sentinel_token": false, |
| "block_mask_prob": 0.0, |
| "context_mask_ratio": 0.0, |
| "random_position": false, |
| "cloze_eval": "true", |
| "old_checkpoint": false, |
| "tokenizer_model_type": "glm-10b" |
| } |