jdosjcd's picture
checkpoint step 800 epoch 1
293b301 verified
{
"epoch": 1,
"global_step": 800,
"loss": 0.1107650026679039,
"training_config": {
"llm_backbone": "dasheng",
"qwen3_name": "Qwen/Qwen3-Embedding-0.6B",
"dasheng_name": "mispeech/midashenglm-7b-0804-fp32",
"dasheng_path": "/workspace/cache/huggingface/dasheng_lm",
"trainable_modules": [
"backbone",
"dasheng",
"dasheng_down",
"dasheng_proj",
"siglip_head"
],
"use_lora": true,
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj"
],
"use_dasheng_lora": false,
"dasheng_lora_r": 8,
"dasheng_lora_alpha": 16,
"dasheng_lora_dropout": 0.1,
"dasheng_lora_target_modules": null,
"train_layer_ratio": 1.0,
"train_layer_strategy": "last_n",
"output_dim": null,
"output_identity": false,
"use_logit_scale": true,
"loss_type": "infonce",
"use_checkpointing": true,
"checkpoint_reentrant": false,
"gather_negatives": true,
"use_loss_mask": true,
"duplicate_doc_threshold": 0.999,
"duplicate_query_threshold": 0.999,
"hard_negative_margin": 0.1,
"add_speaker_mask": false,
"s3_base_path": "https://d2j287p0ytux1o.cloudfront.net",
"dataset_config": "/workspace/SpeechRAG_exp/dataset_configs/setting_multi_task.json",
"aws_profile": "test_user",
"cache_dir": "/workspace/cache/huggingface",
"enable_audio_cache": true,
"audio_cache_dir": "/workspace/cache/huggingface/audio_cache",
"target_sr": 16000,
"mono": true,
"max_query_audio_length": 45.0,
"max_doc_audio_length": 45.0,
"max_query_text_length": 1000,
"max_doc_text_length": 1000,
"eval_max_query_audio_length": null,
"eval_max_doc_audio_length": null,
"eval_max_query_text_length": null,
"eval_max_doc_text_length": null,
"batch_size": 64,
"num_epochs": 10,
"learning_rate": 0.0001,
"gradient_accumulation_steps": 2,
"use_grad_cache": true,
"gc_query_chunk_size": 8,
"gc_doc_chunk_size": 8,
"gc_no_sync_except_last": true,
"ddp_find_unused_parameters": false,
"weight_decay": 0.001,
"optimizer_bits": "default",
"num_workers": 16,
"train_batch_task_mode": "single_task",
"task_batch_ratio": "{\"semantic\": 0.5, \"cross\": 0.5}",
"save_dir": "checkpoints",
"save_steps": 100,
"keep_checkpoints": 1,
"upload_steps": 100,
"upload_repo_id": "jdosjcd/embedding_checkpoint",
"log_dir": "logs",
"log_steps": 5,
"mixed_precision": "bf16",
"use_deepspeed": false,
"deepspeed_config": null,
"use_fsdp": false,
"fsdp_config": null,
"use_ema": false,
"ema_decay": 0.9999,
"ema_update_after": 0,
"ema_update_every": 1,
"scheduler_type": "warmup_cosine_decay",
"warmup_steps": 500,
"warmup_ratio": 0.1,
"min_lr": 1e-05,
"eval_steps": 100,
"eval_batch_size": 32,
"resume_from": null,
"test_mode": false,
"mock_dataset_size": 100
}
}