model.visual.patch_embed.proj.input_quantizer TensorQuantizer(disabled) model.visual.patch_embed.proj.output_quantizer TensorQuantizer(disabled) model.visual.patch_embed.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.0.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.0.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.0.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.0.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.0.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.0.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.0.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.1.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.1.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.1.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.1.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.1.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.1.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.1.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.2.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.2.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.2.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.2.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.2.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.2.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.2.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.3.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.3.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.3.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.3.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.3.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.3.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.3.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.4.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.4.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.4.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.4.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.4.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.4.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.4.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.5.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.5.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.5.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.5.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.5.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.5.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.5.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.6.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.6.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.6.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.6.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.6.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.6.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.6.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.7.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.7.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.7.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.7.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.7.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.7.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.7.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.8.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.8.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.8.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.8.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.8.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.8.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.8.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.9.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.9.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.9.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.9.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.9.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.9.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.9.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.10.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.10.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.10.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.10.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.10.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.10.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.10.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.11.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.11.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.11.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.11.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.11.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.11.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.11.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.12.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.12.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.12.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.12.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.12.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.12.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.12.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.13.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.13.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.13.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.13.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.13.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.13.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.13.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.14.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.14.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.14.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.14.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.14.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.14.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.14.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.15.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.15.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.15.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.15.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.15.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.15.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.15.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.16.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.16.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.16.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.16.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.16.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.16.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.16.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.17.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.17.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.17.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.17.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.17.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.17.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.17.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.18.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.18.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.18.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.18.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.18.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.18.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.18.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.19.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.19.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.19.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.19.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.19.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.19.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.19.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.20.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.20.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.20.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.20.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.20.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.20.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.20.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.21.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.21.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.21.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.21.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.21.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.21.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.21.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.22.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.22.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.22.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.22.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.22.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.22.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.22.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.23.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.23.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.23.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.23.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.23.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.23.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.23.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.24.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.24.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.24.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.24.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.24.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.24.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.24.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.25.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.25.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.25.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.25.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.25.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.25.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.25.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.qkv.input_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.qkv.output_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.qkv.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.proj.input_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.proj.output_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.proj.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.q_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.k_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.v_bmm_quantizer TensorQuantizer(disabled) model.visual.blocks.26.attn.softmax_quantizer TensorQuantizer(disabled) model.visual.blocks.26.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.blocks.26.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.blocks.26.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.blocks.26.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.blocks.26.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.blocks.26.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.visual.merger.linear_fc1.input_quantizer TensorQuantizer(disabled) model.visual.merger.linear_fc1.output_quantizer TensorQuantizer(disabled) model.visual.merger.linear_fc1.weight_quantizer TensorQuantizer(disabled) model.visual.merger.linear_fc2.input_quantizer TensorQuantizer(disabled) model.visual.merger.linear_fc2.output_quantizer TensorQuantizer(disabled) model.visual.merger.linear_fc2.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.1875 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.2891 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4512 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.0564 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.0.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1592 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=4.5312 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4102 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=4.5312 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1729 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=3.9688 calibrator=MaxCalibrator quant) model.language_model.layers.0.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.0.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9609 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=3.8906 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.4688 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.3750 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3535 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.3750 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.3750 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2793 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.3750 calibrator=MaxCalibrator quant) model.language_model.layers.1.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2295 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=3.2031 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2344 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=3.2031 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2148 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.5000 calibrator=MaxCalibrator quant) model.language_model.layers.1.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.1.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4629 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.7188 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.4609 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.2500 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1875 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.2500 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2539 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.2500 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2207 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.2500 calibrator=MaxCalibrator quant) model.language_model.layers.2.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2246 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=4.6875 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3418 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=4.6875 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1982 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=24.5000 calibrator=MaxCalibrator quant) model.language_model.layers.2.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.2.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1641 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=93.5000 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3203 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=93.5000 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2090 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=93.5000 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2217 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.8750 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3906 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.3.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=11.3125 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=13.1250 calibrator=MaxCalibrator quant) model.language_model.layers.3.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.0000 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1641 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.0000 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1787 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=24.5000 calibrator=MaxCalibrator quant) model.language_model.layers.3.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.3.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6992 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=15.5000 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3672 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1953 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3535 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2393 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.0000 calibrator=MaxCalibrator quant) model.language_model.layers.4.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.0938 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1807 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.0938 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1631 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.2500 calibrator=MaxCalibrator quant) model.language_model.layers.4.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.4.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3301 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=14.1875 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3672 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.3750 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2158 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.3750 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.3750 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2363 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.3750 calibrator=MaxCalibrator quant) model.language_model.layers.5.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2129 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=6.2188 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1445 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=6.2188 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1289 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.2500 calibrator=MaxCalibrator quant) model.language_model.layers.5.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.5.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3887 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.3125 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3359 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.2500 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2168 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.2500 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.2500 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1934 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.2500 calibrator=MaxCalibrator quant) model.language_model.layers.6.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.5625 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.5625 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2100 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.1250 calibrator=MaxCalibrator quant) model.language_model.layers.6.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.6.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5703 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=85.5000 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3555 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=85.5000 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2852 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=85.5000 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2930 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.9375 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3438 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.7.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=14.6250 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=15.0625 calibrator=MaxCalibrator quant) model.language_model.layers.7.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=9.6250 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2305 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=9.6250 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1270 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) model.language_model.layers.7.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.7.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3613 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.3750 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3594 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.0000 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2295 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.0000 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2090 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.0000 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1953 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.0000 calibrator=MaxCalibrator quant) model.language_model.layers.8.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2227 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.1250 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2188 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.1250 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1387 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.8750 calibrator=MaxCalibrator quant) model.language_model.layers.8.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.8.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4375 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=14.6875 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.2344 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1885 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2422 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1982 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.8750 calibrator=MaxCalibrator quant) model.language_model.layers.9.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2246 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.0625 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1729 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.0625 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1240 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) model.language_model.layers.9.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.9.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4473 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=21.7500 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0234 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3633 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2891 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1807 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2480 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.0000 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1289 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.2500 calibrator=MaxCalibrator quant) model.language_model.layers.10.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.10.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2793 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4824 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2832 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=5.3438 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9219 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.11.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=14.4375 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=11.4375 calibrator=MaxCalibrator quant) model.language_model.layers.11.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.5000 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2021 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.5000 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1201 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=33.2500 calibrator=MaxCalibrator quant) model.language_model.layers.11.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.11.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5977 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=15.1250 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9492 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2285 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2227 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2012 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.12.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2041 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.1250 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1895 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.1250 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1133 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.2500 calibrator=MaxCalibrator quant) model.language_model.layers.12.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.12.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2246 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.2500 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9102 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.2500 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2139 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.2500 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2988 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.2500 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2158 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.2500 calibrator=MaxCalibrator quant) model.language_model.layers.13.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2207 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.6250 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1572 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.6250 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1562 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.8750 calibrator=MaxCalibrator quant) model.language_model.layers.13.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.13.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2480 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.7500 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0469 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2910 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2266 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.14.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1807 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.7500 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1279 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.7500 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1357 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.0000 calibrator=MaxCalibrator quant) model.language_model.layers.14.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.14.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2412 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.0000 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1797 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.0000 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4434 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.0000 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1494 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.6250 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8867 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.15.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.1250 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=11.6250 calibrator=MaxCalibrator quant) model.language_model.layers.15.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=17.6250 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1553 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=17.6250 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1260 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.1250 calibrator=MaxCalibrator quant) model.language_model.layers.15.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.15.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2344 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.7500 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9453 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2344 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2207 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1572 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.7500 calibrator=MaxCalibrator quant) model.language_model.layers.16.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.6250 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1465 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=18.6250 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1211 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=20.3750 calibrator=MaxCalibrator quant) model.language_model.layers.16.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.16.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3398 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.7500 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9648 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.2500 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2480 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.2500 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2188 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.2500 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.2500 calibrator=MaxCalibrator quant) model.language_model.layers.17.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2227 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=19.2500 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1953 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=19.2500 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1445 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=22.5000 calibrator=MaxCalibrator quant) model.language_model.layers.17.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.17.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2197 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.2031 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.5000 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2393 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.5000 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2949 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.5000 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2031 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.5000 calibrator=MaxCalibrator quant) model.language_model.layers.18.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1719 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.7500 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.7500 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2256 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.7500 calibrator=MaxCalibrator quant) model.language_model.layers.18.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.18.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5781 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.0000 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.0000 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3906 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.0000 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.8438 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9219 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.19.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.1250 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=12.6875 calibrator=MaxCalibrator quant) model.language_model.layers.19.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.1250 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2119 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.1250 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1426 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.7500 calibrator=MaxCalibrator quant) model.language_model.layers.19.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.19.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5391 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.5000 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.2266 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.5000 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.5000 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1719 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.5000 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1943 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.5000 calibrator=MaxCalibrator quant) model.language_model.layers.20.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2217 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=20.7500 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2393 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=20.7500 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1328 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.7500 calibrator=MaxCalibrator quant) model.language_model.layers.20.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.20.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3633 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=99.0000 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1250 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=63.5000 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4883 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=63.5000 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2441 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=63.5000 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1572 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=63.5000 calibrator=MaxCalibrator quant) model.language_model.layers.21.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1709 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.0000 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.0000 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1445 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.7500 calibrator=MaxCalibrator quant) model.language_model.layers.21.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.21.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2461 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.5000 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0859 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.5000 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2520 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.5000 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1758 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.5000 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1768 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=59.5000 calibrator=MaxCalibrator quant) model.language_model.layers.22.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2539 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.1250 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2422 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.1250 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2266 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.7500 calibrator=MaxCalibrator quant) model.language_model.layers.22.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.22.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4980 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.5000 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2969 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.5000 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3984 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.5000 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1826 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=16.1250 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0625 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.23.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=20.2500 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=19.1250 calibrator=MaxCalibrator quant) model.language_model.layers.23.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.1250 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1309 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.1250 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1147 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.23.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.23.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6133 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.0000 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9883 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=75.5000 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2207 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=75.5000 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2324 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=75.5000 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1963 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=75.5000 calibrator=MaxCalibrator quant) model.language_model.layers.24.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1934 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1602 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.7500 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1787 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.24.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.24.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7422 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.2500 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8125 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3770 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1904 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.25.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1709 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.6250 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1494 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.6250 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1572 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.7500 calibrator=MaxCalibrator quant) model.language_model.layers.25.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.25.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5156 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=14.8125 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8047 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2910 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1846 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.26.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2334 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.2500 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1631 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.2500 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1895 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.0000 calibrator=MaxCalibrator quant) model.language_model.layers.26.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.26.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6523 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=88.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3262 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=88.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3809 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=88.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=10.3750 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5938 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.27.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=21.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=25.8750 calibrator=MaxCalibrator quant) model.language_model.layers.27.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1777 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.0000 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1895 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=26.7500 calibrator=MaxCalibrator quant) model.language_model.layers.27.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.27.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5430 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.8750 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8359 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3398 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3125 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1533 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.28.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1875 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.7500 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2080 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=31.7500 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1484 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.5000 calibrator=MaxCalibrator quant) model.language_model.layers.28.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.28.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3926 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.8750 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6797 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.5000 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2598 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.5000 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2451 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.5000 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2217 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.5000 calibrator=MaxCalibrator quant) model.language_model.layers.29.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1982 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=33.0000 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1621 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=33.0000 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2695 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.7500 calibrator=MaxCalibrator quant) model.language_model.layers.29.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.29.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3770 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=16.5000 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5273 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=80.5000 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3535 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=80.5000 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4746 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=80.5000 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1582 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=80.5000 calibrator=MaxCalibrator quant) model.language_model.layers.30.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1465 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.7500 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2139 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.7500 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3281 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.5000 calibrator=MaxCalibrator quant) model.language_model.layers.30.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.30.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6523 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=91.5000 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2910 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=91.5000 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=91.5000 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3652 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.9375 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5938 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.31.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=21.2500 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=36.2500 calibrator=MaxCalibrator quant) model.language_model.layers.31.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1748 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1504 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=23.3750 calibrator=MaxCalibrator quant) model.language_model.layers.31.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.31.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6680 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=12.9375 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7812 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2334 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3086 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1904 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.32.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2041 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3203 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.0000 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1699 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=25.2500 calibrator=MaxCalibrator quant) model.language_model.layers.32.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.32.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5664 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=15.9375 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6875 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3145 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2930 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1621 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.0000 calibrator=MaxCalibrator quant) model.language_model.layers.33.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2041 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.7500 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2422 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.7500 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2969 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=24.1250 calibrator=MaxCalibrator quant) model.language_model.layers.33.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.33.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3242 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=21.1250 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9375 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2354 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4199 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2158 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.34.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1885 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2266 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.2500 calibrator=MaxCalibrator quant) model.language_model.layers.34.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.34.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5781 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3086 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2969 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3516 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=8.3125 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9609 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.35.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.8750 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=32.5000 calibrator=MaxCalibrator quant) model.language_model.layers.35.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3203 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2168 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.0000 calibrator=MaxCalibrator quant) model.language_model.layers.35.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.35.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6602 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.2500 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0312 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2695 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2090 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=83.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2256 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3340 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.5000 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2314 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.36.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.36.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.2500 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9609 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4531 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1436 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.37.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2178 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.1250 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.1250 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2158 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.5000 calibrator=MaxCalibrator quant) model.language_model.layers.37.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.37.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3066 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0156 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.0000 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2891 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.0000 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2500 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.0000 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1348 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=60.0000 calibrator=MaxCalibrator quant) model.language_model.layers.38.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2217 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.5000 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3242 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.5000 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1777 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=55.2500 calibrator=MaxCalibrator quant) model.language_model.layers.38.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.38.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6992 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3242 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3145 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=7.6250 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7539 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.39.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=19.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=24.7500 calibrator=MaxCalibrator quant) model.language_model.layers.39.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2080 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=38.5000 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2061 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=61.0000 calibrator=MaxCalibrator quant) model.language_model.layers.39.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.39.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5820 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.0000 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8477 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.0000 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2676 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.0000 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4688 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.0000 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1816 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.0000 calibrator=MaxCalibrator quant) model.language_model.layers.40.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.7500 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2695 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.7500 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1738 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=105.5000 calibrator=MaxCalibrator quant) model.language_model.layers.40.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.40.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6758 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.3750 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6992 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1768 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.41.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1953 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.2500 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2539 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.2500 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1787 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=76.5000 calibrator=MaxCalibrator quant) model.language_model.layers.41.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.41.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5586 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.7500 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7227 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.0000 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3379 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.0000 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3242 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.0000 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1719 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=79.0000 calibrator=MaxCalibrator quant) model.language_model.layers.42.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2432 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=55.7500 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2793 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=55.7500 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3555 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=90.5000 calibrator=MaxCalibrator quant) model.language_model.layers.42.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.42.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5742 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=107.5000 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3848 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=107.5000 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2891 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=107.5000 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=13.6875 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4551 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.43.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=20.1250 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=46.7500 calibrator=MaxCalibrator quant) model.language_model.layers.43.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.5000 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2090 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.5000 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1631 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.2500 calibrator=MaxCalibrator quant) model.language_model.layers.43.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.43.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6094 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=21.8750 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5938 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.0000 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2637 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.0000 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3301 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.0000 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1270 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=72.0000 calibrator=MaxCalibrator quant) model.language_model.layers.44.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1982 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.0000 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2139 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=39.0000 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1777 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=32.2500 calibrator=MaxCalibrator quant) model.language_model.layers.44.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.44.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4766 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.7500 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7812 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2402 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2256 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2129 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.45.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.0000 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=37.0000 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2393 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=40.5000 calibrator=MaxCalibrator quant) model.language_model.layers.45.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.45.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3906 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.2500 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7383 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3887 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1729 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.0000 calibrator=MaxCalibrator quant) model.language_model.layers.46.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1943 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1992 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.5000 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3320 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=30.1250 calibrator=MaxCalibrator quant) model.language_model.layers.46.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.46.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6211 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=98.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3457 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=98.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2969 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=98.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3828 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=9.6250 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6211 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.47.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.3750 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=46.7500 calibrator=MaxCalibrator quant) model.language_model.layers.47.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2090 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=53.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3008 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.0000 calibrator=MaxCalibrator quant) model.language_model.layers.47.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.47.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6250 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=36.0000 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7773 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2852 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2773 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1006 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.48.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1875 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3730 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.2500 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=52.0000 calibrator=MaxCalibrator quant) model.language_model.layers.48.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.48.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5977 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7461 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2852 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1318 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=68.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1855 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3633 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2461 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=44.5000 calibrator=MaxCalibrator quant) model.language_model.layers.49.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.49.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6367 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9336 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.5000 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.5000 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4180 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.5000 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1416 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.5000 calibrator=MaxCalibrator quant) model.language_model.layers.50.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1963 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2949 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2314 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=69.5000 calibrator=MaxCalibrator quant) model.language_model.layers.50.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.50.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5820 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3125 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=73.5000 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3770 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=27.1250 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1875 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.51.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=18.0000 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=43.0000 calibrator=MaxCalibrator quant) model.language_model.layers.51.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2930 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.2500 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2754 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=86.0000 calibrator=MaxCalibrator quant) model.language_model.layers.51.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.51.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7383 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=74.5000 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8789 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.5000 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.5000 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5039 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.5000 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1338 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=62.5000 calibrator=MaxCalibrator quant) model.language_model.layers.52.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.7500 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3516 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=41.7500 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2373 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=111.0000 calibrator=MaxCalibrator quant) model.language_model.layers.52.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.52.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7891 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=123.5000 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8828 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3145 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1387 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.53.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2715 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.7500 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3652 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=34.7500 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=110.5000 calibrator=MaxCalibrator quant) model.language_model.layers.53.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.53.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5977 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=93.5000 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9688 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.7500 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3105 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.7500 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2734 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.7500 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1162 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=49.7500 calibrator=MaxCalibrator quant) model.language_model.layers.54.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2559 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.7500 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3809 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.7500 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2012 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=616.0000 calibrator=MaxCalibrator quant) model.language_model.layers.54.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.54.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7109 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=57.5000 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2578 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=57.5000 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2852 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=57.5000 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3359 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=28.5000 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.3047 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.55.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=15.8125 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=50.7500 calibrator=MaxCalibrator quant) model.language_model.layers.55.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.3750 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3516 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=29.3750 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2715 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=147.0000 calibrator=MaxCalibrator quant) model.language_model.layers.55.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.55.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7422 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=154.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0312 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3438 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2275 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1484 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2949 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=30.7500 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3809 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=30.7500 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1963 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=148.0000 calibrator=MaxCalibrator quant) model.language_model.layers.56.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.56.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7383 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=123.5000 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9961 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3184 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1875 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1260 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=48.0000 calibrator=MaxCalibrator quant) model.language_model.layers.57.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3047 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.7500 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5078 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=35.7500 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2021 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=147.0000 calibrator=MaxCalibrator quant) model.language_model.layers.57.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.57.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6719 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=142.0000 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1719 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.7500 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3008 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.7500 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2080 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.7500 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1396 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=46.7500 calibrator=MaxCalibrator quant) model.language_model.layers.58.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3418 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4531 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=43.0000 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2207 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=470.0000 calibrator=MaxCalibrator quant) model.language_model.layers.58.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.58.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7344 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.0000 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3789 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.0000 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2490 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=65.0000 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9414 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=91.5000 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1016 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.59.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=15.3750 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=107.0000 calibrator=MaxCalibrator quant) model.language_model.layers.59.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.7500 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3516 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=45.7500 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3652 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=604.0000 calibrator=MaxCalibrator quant) model.language_model.layers.59.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.59.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8320 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=170.0000 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7969 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.5000 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5391 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.5000 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3066 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.5000 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1226 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.5000 calibrator=MaxCalibrator quant) model.language_model.layers.60.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3086 calibrator=MaxCalibrator quant) model.language_model.layers.60.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=47.7500 calibrator=MaxCalibrator quant) model.language_model.layers.60.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4219 calibrator=MaxCalibrator quant) model.language_model.layers.60.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=47.7500 calibrator=MaxCalibrator quant) model.language_model.layers.60.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2695 calibrator=MaxCalibrator quant) model.language_model.layers.60.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=249.0000 calibrator=MaxCalibrator quant) model.language_model.layers.60.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.60.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.1719 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=132.0000 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.8164 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4004 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2812 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1260 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=42.0000 calibrator=MaxCalibrator quant) model.language_model.layers.61.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3320 calibrator=MaxCalibrator quant) model.language_model.layers.61.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.61.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4766 calibrator=MaxCalibrator quant) model.language_model.layers.61.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=51.7500 calibrator=MaxCalibrator quant) model.language_model.layers.61.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3477 calibrator=MaxCalibrator quant) model.language_model.layers.61.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=254.0000 calibrator=MaxCalibrator quant) model.language_model.layers.61.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.61.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4922 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.out_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=218.0000 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.out_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.9766 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.2500 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4961 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_z.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.2500 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3672 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_b.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.2500 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.in_proj_b.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.1226 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_a.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.2500 calibrator=MaxCalibrator quant) model.language_model.layers.62.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.linear_attn.in_proj_a.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3008 calibrator=MaxCalibrator quant) model.language_model.layers.62.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.62.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.4180 calibrator=MaxCalibrator quant) model.language_model.layers.62.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=54.0000 calibrator=MaxCalibrator quant) model.language_model.layers.62.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2949 calibrator=MaxCalibrator quant) model.language_model.layers.62.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=376.0000 calibrator=MaxCalibrator quant) model.language_model.layers.62.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.62.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.6914 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.q_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.7500 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.self_attn.q_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5195 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.k_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.7500 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.self_attn.k_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2617 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.v_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.7500 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.self_attn.v_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=1.0156 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.o_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=133.0000 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.self_attn.o_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.7656 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.q_bmm_quantizer TensorQuantizer(disabled) model.language_model.layers.63.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=12.5625 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=143.0000 calibrator=MaxCalibrator quant) model.language_model.layers.63.self_attn.softmax_quantizer TensorQuantizer(disabled) model.language_model.layers.63.mlp.gate_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.5000 calibrator=MaxCalibrator quant) model.language_model.layers.63.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.mlp.gate_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.3555 calibrator=MaxCalibrator quant) model.language_model.layers.63.mlp.up_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=56.5000 calibrator=MaxCalibrator quant) model.language_model.layers.63.mlp.up_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.mlp.up_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.2930 calibrator=MaxCalibrator quant) model.language_model.layers.63.mlp.down_proj.input_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=486.0000 calibrator=MaxCalibrator quant) model.language_model.layers.63.mlp.down_proj.output_quantizer TensorQuantizer(disabled) model.language_model.layers.63.mlp.down_proj.weight_quantizer TensorQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'dynamic', 'scale_bits': (4, 3)}, amax=0.5195 calibrator=MaxCalibrator quant) lm_head.input_quantizer TensorQuantizer(disabled) lm_head.output_quantizer TensorQuantizer(disabled) lm_head.weight_quantizer TensorQuantizer(disabled) 2140 TensorQuantizers found in model