diff --git a/config.yaml b/config.yaml
deleted file mode 100644
index 7679a00..0000000
--- a/config.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-wave_file_save_path: "./output"
-
-id: 
-  version: "v1"
-  name: "default"
-  root: "/mnt/fast/nobackup/users/hl01486/projects/general_audio_generation/AudioLDM-python/config/default/latent_diffusion.yaml"
-
-model:
-  device: "cuda"
-  reload_from_ckpt: "/mnt/fast/nobackup/scratch4weeks/hl01486/exps/audio_generation/stablediffusion/LDM/audioverse/2023_01_14_full_F4_B_spatial_v2_v1/checkpoints/last.ckpt"
-  target: audioldm.pipline.LatentDiffusion
-  params:
-    base_learning_rate: 5.0e-6
-    linear_start: 0.0015
-    linear_end: 0.0195
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: fbank
-    cond_stage_key: waveform
-    latent_t_size: 256 # TODO might need to change
-    latent_f_size: 16
-    channels: 8 # TODO might need to change
-    cond_stage_trainable: true
-    conditioning_key: film
-    monitor: val/loss_simple_ema
-    scale_by_std: true
-    unet_config:
-      target: audioldm.latent_diffusion.openaimodel.UNetModel
-      params:
-        image_size: 64 # TODO here
-        extra_film_condition_dim: 512
-        extra_film_use_concat: true
-        in_channels: 8 # TODO might need to change
-        out_channels: 8 # TODO might need to change
-        model_channels: 128 # TODO might need to change
-        attention_resolutions:
-        - 8
-        - 4
-        - 2
-        num_res_blocks: 2
-        channel_mult:
-        - 1
-        - 2
-        - 3
-        - 5
-        num_head_channels: 32
-        use_spatial_transformer: true
-
-    first_stage_config:
-      base_learning_rate: 4.5e-05
-      target: audioldm.variational_autoencoder.autoencoder.AutoencoderKL
-      params:
-        monitor: val/rec_loss
-        image_key: fbank
-        subband: 1
-        embed_dim: 8
-        time_shuffle: 1
-        ddconfig:
-          double_z: true
-          z_channels: 8
-          resolution: 256
-          downsample_time: false
-          in_channels: 1
-          out_ch: 1
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-
-    cond_stage_config:
-      target: audioldm.clap.encoders.CLAPAudioEmbeddingClassifierFreev2
-      params:
-        # pretrained_path: /mnt/fast/nobackup/users/hl01486/projects/contrastive_pretraining/CLAP/assets/checkpoints/epoch_top_0_audioset_no_fusion.pt
-        key: waveform
-        sampling_rate: 16000
-        embed_mode: audio
-        unconditional_prob: 0.1
\ No newline at end of file