default_stage: default_modifiers: QuantizationModifier: config_groups: group_fp8: targets: ['re:.*\.linear_attn\.in_proj_qkv$', 're:.*\.linear_attn\.in_proj_z$', 're:.*\.linear_attn\.out_proj$', 're:.*\.self_attn\.q_proj$', 're:.*\.self_attn\.k_proj$', 're:.*\.self_attn\.v_proj$', 're:.*\.mlp\.down_proj$', 're:.*\.mlp\.experts\.down_proj$', 're:.*\.mlp\.shared_expert\.down_proj$'] weights: num_bits: 8 type: float symmetric: true group_size: null strategy: channel block_structure: null dynamic: false actorder: null scale_dtype: null zp_dtype: null observer: memoryless_minmax observer_kwargs: {} input_activations: num_bits: 8 type: float symmetric: true group_size: null strategy: token block_structure: null dynamic: true actorder: null scale_dtype: null zp_dtype: null observer: null observer_kwargs: {} output_activations: null format: null group_nvfp4: targets: ['re:.*\.self_attn\.o_proj$', 're:.*\.mlp\.gate_proj$', 're:.*\.mlp\.up_proj$', 're:.*\.mlp\.experts\.gate_up_proj$', 're:.*\.mlp\.shared_expert\.gate_proj$', 're:.*\.mlp\.shared_expert\.up_proj$'] weights: num_bits: 4 type: float symmetric: true group_size: 16 strategy: tensor_group block_structure: null dynamic: false actorder: null scale_dtype: torch.float8_e4m3fn zp_dtype: null observer: memoryless_minmax observer_kwargs: {} input_activations: num_bits: 4 type: float symmetric: true group_size: 16 strategy: tensor_group block_structure: null dynamic: local actorder: null scale_dtype: torch.float8_e4m3fn zp_dtype: null observer: minmax observer_kwargs: {} output_activations: null format: null targets: [Linear] ignore: [lm_head, 're:model\.embed_tokens$', 're:visual.*', 're:model\.visual.*', 're:.*\.mlp\.gate$', 're:.*\.mlp\.shared_expert_gate$', 're:.*\.linear_attn\.in_proj_b$', 're:.*\.linear_attn\.in_proj_a$'] bypass_divisibility_checks: false