jdosjcd
/

embedding_checkpoint

Model card Files Files and versions

embedding_checkpoint / 0331_1923_checkpoint_step_800_epoch_1 /config.json

jdosjcd's picture

checkpoint step 800 epoch 1

293b301 verified 17 days ago

history blame contribute delete

3.02 kB

	{
	"epoch": 1,
	"global_step": 800,
	"loss": 0.1107650026679039,
	"training_config": {
	"llm_backbone": "dasheng",
	"qwen3_name": "Qwen/Qwen3-Embedding-0.6B",
	"dasheng_name": "mispeech/midashenglm-7b-0804-fp32",
	"dasheng_path": "/workspace/cache/huggingface/dasheng_lm",
	"trainable_modules": [
	"backbone",
	"dasheng",
	"dasheng_down",
	"dasheng_proj",
	"siglip_head"
	],
	"use_lora": true,
	"lora_r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"lora_target_modules": [
	"q_proj",
	"k_proj",
	"v_proj"
	],
	"use_dasheng_lora": false,
	"dasheng_lora_r": 8,
	"dasheng_lora_alpha": 16,
	"dasheng_lora_dropout": 0.1,
	"dasheng_lora_target_modules": null,
	"train_layer_ratio": 1.0,
	"train_layer_strategy": "last_n",
	"output_dim": null,
	"output_identity": false,
	"use_logit_scale": true,
	"loss_type": "infonce",
	"use_checkpointing": true,
	"checkpoint_reentrant": false,
	"gather_negatives": true,
	"use_loss_mask": true,
	"duplicate_doc_threshold": 0.999,
	"duplicate_query_threshold": 0.999,
	"hard_negative_margin": 0.1,
	"add_speaker_mask": false,
	"s3_base_path": "https://d2j287p0ytux1o.cloudfront.net",
	"dataset_config": "/workspace/SpeechRAG_exp/dataset_configs/setting_multi_task.json",
	"aws_profile": "test_user",
	"cache_dir": "/workspace/cache/huggingface",
	"enable_audio_cache": true,
	"audio_cache_dir": "/workspace/cache/huggingface/audio_cache",
	"target_sr": 16000,
	"mono": true,
	"max_query_audio_length": 45.0,
	"max_doc_audio_length": 45.0,
	"max_query_text_length": 1000,
	"max_doc_text_length": 1000,
	"eval_max_query_audio_length": null,
	"eval_max_doc_audio_length": null,
	"eval_max_query_text_length": null,
	"eval_max_doc_text_length": null,
	"batch_size": 64,
	"num_epochs": 10,
	"learning_rate": 0.0001,
	"gradient_accumulation_steps": 2,
	"use_grad_cache": true,
	"gc_query_chunk_size": 8,
	"gc_doc_chunk_size": 8,
	"gc_no_sync_except_last": true,
	"ddp_find_unused_parameters": false,
	"weight_decay": 0.001,
	"optimizer_bits": "default",
	"num_workers": 16,
	"train_batch_task_mode": "single_task",
	"task_batch_ratio": "{\"semantic\": 0.5, \"cross\": 0.5}",
	"save_dir": "checkpoints",
	"save_steps": 100,
	"keep_checkpoints": 1,
	"upload_steps": 100,
	"upload_repo_id": "jdosjcd/embedding_checkpoint",
	"log_dir": "logs",
	"log_steps": 5,
	"mixed_precision": "bf16",
	"use_deepspeed": false,
	"deepspeed_config": null,
	"use_fsdp": false,
	"fsdp_config": null,
	"use_ema": false,
	"ema_decay": 0.9999,
	"ema_update_after": 0,
	"ema_update_every": 1,
	"scheduler_type": "warmup_cosine_decay",
	"warmup_steps": 500,
	"warmup_ratio": 0.1,
	"min_lr": 1e-05,
	"eval_steps": 100,
	"eval_batch_size": 32,
	"resume_from": null,
	"test_mode": false,
	"mock_dataset_size": 100
	}
	}