| { |
| "epoch": 1, |
| "global_step": 800, |
| "loss": 0.1107650026679039, |
| "training_config": { |
| "llm_backbone": "dasheng", |
| "qwen3_name": "Qwen/Qwen3-Embedding-0.6B", |
| "dasheng_name": "mispeech/midashenglm-7b-0804-fp32", |
| "dasheng_path": "/workspace/cache/huggingface/dasheng_lm", |
| "trainable_modules": [ |
| "backbone", |
| "dasheng", |
| "dasheng_down", |
| "dasheng_proj", |
| "siglip_head" |
| ], |
| "use_lora": true, |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.05, |
| "lora_target_modules": [ |
| "q_proj", |
| "k_proj", |
| "v_proj" |
| ], |
| "use_dasheng_lora": false, |
| "dasheng_lora_r": 8, |
| "dasheng_lora_alpha": 16, |
| "dasheng_lora_dropout": 0.1, |
| "dasheng_lora_target_modules": null, |
| "train_layer_ratio": 1.0, |
| "train_layer_strategy": "last_n", |
| "output_dim": null, |
| "output_identity": false, |
| "use_logit_scale": true, |
| "loss_type": "infonce", |
| "use_checkpointing": true, |
| "checkpoint_reentrant": false, |
| "gather_negatives": true, |
| "use_loss_mask": true, |
| "duplicate_doc_threshold": 0.999, |
| "duplicate_query_threshold": 0.999, |
| "hard_negative_margin": 0.1, |
| "add_speaker_mask": false, |
| "s3_base_path": "https://d2j287p0ytux1o.cloudfront.net", |
| "dataset_config": "/workspace/SpeechRAG_exp/dataset_configs/setting_multi_task.json", |
| "aws_profile": "test_user", |
| "cache_dir": "/workspace/cache/huggingface", |
| "enable_audio_cache": true, |
| "audio_cache_dir": "/workspace/cache/huggingface/audio_cache", |
| "target_sr": 16000, |
| "mono": true, |
| "max_query_audio_length": 45.0, |
| "max_doc_audio_length": 45.0, |
| "max_query_text_length": 1000, |
| "max_doc_text_length": 1000, |
| "eval_max_query_audio_length": null, |
| "eval_max_doc_audio_length": null, |
| "eval_max_query_text_length": null, |
| "eval_max_doc_text_length": null, |
| "batch_size": 64, |
| "num_epochs": 10, |
| "learning_rate": 0.0001, |
| "gradient_accumulation_steps": 2, |
| "use_grad_cache": true, |
| "gc_query_chunk_size": 8, |
| "gc_doc_chunk_size": 8, |
| "gc_no_sync_except_last": true, |
| "ddp_find_unused_parameters": false, |
| "weight_decay": 0.001, |
| "optimizer_bits": "default", |
| "num_workers": 16, |
| "train_batch_task_mode": "single_task", |
| "task_batch_ratio": "{\"semantic\": 0.5, \"cross\": 0.5}", |
| "save_dir": "checkpoints", |
| "save_steps": 100, |
| "keep_checkpoints": 1, |
| "upload_steps": 100, |
| "upload_repo_id": "jdosjcd/embedding_checkpoint", |
| "log_dir": "logs", |
| "log_steps": 5, |
| "mixed_precision": "bf16", |
| "use_deepspeed": false, |
| "deepspeed_config": null, |
| "use_fsdp": false, |
| "fsdp_config": null, |
| "use_ema": false, |
| "ema_decay": 0.9999, |
| "ema_update_after": 0, |
| "ema_update_every": 1, |
| "scheduler_type": "warmup_cosine_decay", |
| "warmup_steps": 500, |
| "warmup_ratio": 0.1, |
| "min_lr": 1e-05, |
| "eval_steps": 100, |
| "eval_batch_size": 32, |
| "resume_from": null, |
| "test_mode": false, |
| "mock_dataset_size": 100 |
| } |
| } |