| vocab_size: 1001 |
| embedding_dim: 1024 |
|
|
| in_channels: 1216 |
| out_channels: 1 |
| resblock_type: "1" |
| resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]] |
| resblock_kernel_sizes: [3, 7, 11] |
| upsample_kernel_sizes: [11, 8, 8, 4, 4] |
| upsample_initial_channel: 512 |
| upsample_factors: [5, 4, 4, 2, 2] |
| inference_padding: 5 |
| cond_channels: 0 |
| conv_post_bias: True |
| var_pred_hidden_dim: 128 |
| var_pred_kernel_size: 3 |
| var_pred_dropout: 0.5 |
|
|
| generator: !new:speechbrain.lobes.models.HifiGAN.UnitHifiganGenerator |
| in_channels: !ref <in_channels> |
| out_channels: !ref <out_channels> |
| resblock_type: !ref <resblock_type> |
| resblock_dilation_sizes: !ref <resblock_dilation_sizes> |
| resblock_kernel_sizes: !ref <resblock_kernel_sizes> |
| upsample_kernel_sizes: !ref <upsample_kernel_sizes> |
| upsample_initial_channel: !ref <upsample_initial_channel> |
| upsample_factors: !ref <upsample_factors> |
| inference_padding: !ref <inference_padding> |
| cond_channels: !ref <cond_channels> |
| conv_post_bias: !ref <conv_post_bias> |
| vocab_size: !ref <vocab_size> |
| embedding_dim: !ref <embedding_dim> |
| duration_predictor: False |
| multi_speaker: True |
| var_pred_hidden_dim: !ref <var_pred_hidden_dim> |
| var_pred_kernel_size: !ref <var_pred_kernel_size> |
| var_pred_dropout: !ref <var_pred_dropout> |
|
|
| modules: |
| generator: !ref <generator> |
|
|
| pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
| loadables: |
| generator: !ref <generator> |