Add files via upload

add Encoding fusion files
skelemoa · Feb 1, 2022 · 49a542e · 49a542e
1 parent dae5803
commit 49a542e
Show file tree

Hide file tree

Showing 2 changed files with 109 additions and 0 deletions.
diff --git a/gtad_fusion.py b/gtad_fusion.py
@@ -0,0 +1,56 @@
+import h5py
+import torch
+import argparse
+import numpy as np
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--type', type=str, default='0', help='fusion type') # 0: AvgTrim 1: DupTrim 2: Concat
+opt = parser.parse_args()
+
+assert(int(opt.type) >= 0 and int(opt.type) <= 2)
+
+with h5py.File("combined_val.h5", "w") as writeHere:
+	with h5py.File('rgb_val.h5', "r") as rgbF:
+		with h5py.File('audio_val.h5', "r") as AudioF:
+			for i in tqdm(rgbF.keys()):
+				videoFeats = torch.tensor(np.array(rgbF[i]))
+				audioFeats = torch.tensor(np.array(AudioF[i]))
+
+				# AvgTrim
+				if opt.type == "0":
+					k = audioFeats.shape[0] // videoFeats.shape[0]
+					combinedFeats = torch.zeros(1, 128)
+					for j in range(0, audioFeats.shape[0], 2):
+						combinedFeats = torch.cat([combinedFeats, torch.mean(audioFeats[j:j+k, :], 0).unsqueeze(0)], dim=0)
+					combinedFeats = combinedFeats[1:, :]
+					commonSize = min(combinedFeats.shape[0], videoFeats.shape[0])
+					videoFeats = videoFeats[: commonSize, :]
+					combinedFeats = combinedFeats[: commonSize, :]
+					combinedFeats = torch.cat([videoFeats, combinedFeats], dim=1)
+					writeHere.create_dataset(i, data=combinedFeats.detach().cpu().numpy())
+					continue
+
+				# DupTrim
+				if opt.type == "1":
+					if audioFeats.shape[0] > videoFeats.shape[0]:
+						k = audioFeats.shape[0] // videoFeats.shape[0]
+						videoFeats = videoFeats.unsqueeze(1).repeat(1, k, 1)
+						videoFeats = videoFeats.reshape(videoFeats.shape[0] * videoFeats.shape[1], videoFeats.shape[2])
+					else:
+						k = videoFeats.shape[0] // audioFeats.shape[0]
+						audioFeats = audioFeats.unsqueeze(1).repeat(1, k, 1)
+						audioFeats = audioFeats.reshape(audioFeats.shape[0] * audioFeats.shape[1], audioFeats.shape[2])
+
+					commonSize = min(audioFeats.shape[0], videoFeats.shape[0])
+					videoFeats = videoFeats[: commonSize, :]
+					audioFeats = audioFeats[: commonSize, :]
+					combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
+					writeHere.create_dataset(i, data=combinedFeats.detach().cpu().numpy())
+					continue
+
+				# Concat
+				if opt.type == "2":
+					combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
+					writeHere.create_dataset(i, data=combinedFeats.detach().cpu().numpy())
+					continue
diff --git a/muses_fusion.py b/muses_fusion.py
@@ -0,0 +1,53 @@
+import torch
+import argparse
+from glob import glob
+from tqdm import tqdm
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--type', type=str, default='0', help='fusion type') # 0: AvgTrim 1: DupTrim 2: Concat
+opt = parser.parse_args()
+
+assert(int(opt.type) >= 0 and int(opt.type) <= 2)
+
+files = glob('I3D_RGB/*')
+for i in tqdm(files):
+	videoFeats = torch.load(i)
+	audioFeats = torch.load(i.replace('I3D_RGB', 'AudioFeats'))
+
+	# AvgTrim
+	if opt.type == "0":
+		k = audioFeats.shape[0] // videoFeats.shape[0]
+		combinedFeats = torch.zeros(1, 128)
+		for j in range(0, audioFeats.shape[0], 2):
+			combinedFeats = torch.cat([combinedFeats, torch.mean(audioFeats[j:j+k, :], 0).unsqueeze(0)], dim=0)
+		combinedFeats = combinedFeats[1:, :]
+		commonSize = min(combinedFeats.shape[0], videoFeats.shape[0])
+		videoFeats = videoFeats[: commonSize, :]
+		combinedFeats = combinedFeats[: commonSize, :]
+		combinedFeats = torch.cat([videoFeats, combinedFeats], dim=1)
+		torch.save(combinedFeats, i.replace('I3D_RGB', 'combinedFeats'))
+		continue
+
+	# DupTrim
+	if opt.type == "1":
+		if audioFeats.shape[0] > videoFeats.shape[0]:
+			k = audioFeats.shape[0] // videoFeats.shape[0]
+			videoFeats = videoFeats.unsqueeze(1).repeat(1, k, 1)
+			videoFeats = videoFeats.reshape(videoFeats.shape[0] * videoFeats.shape[1], videoFeats.shape[2])
+		else:
+			k = videoFeats.shape[0] // audioFeats.shape[0]
+			audioFeats = audioFeats.unsqueeze(1).repeat(1, k, 1)
+			audioFeats = audioFeats.reshape(audioFeats.shape[0] * audioFeats.shape[1], audioFeats.shape[2])
+
+		commonSize = min(audioFeats.shape[0], videoFeats.shape[0])
+		videoFeats = videoFeats[: commonSize, :]
+		audioFeats = audioFeats[: commonSize, :]
+		combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
+		torch.save(combinedFeats, i.replace('I3D_RGB', 'combinedFeats'))
+		continue
+
+	# Concat
+	if opt.type == "2":
+		combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
+		torch.save(combinedFeats, i.replace('I3D_RGB', 'combinedFeats'))
+		continue