Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
add Encoding fusion files
  • Loading branch information
jazib-sudo authored Feb 1, 2022
1 parent dae5803 commit 49a542e
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 0 deletions.
56 changes: 56 additions & 0 deletions gtad_fusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import h5py
import torch
import argparse
import numpy as np
from tqdm import tqdm

parser = argparse.ArgumentParser()
parser.add_argument('--type', type=str, default='0', help='fusion type') # 0: AvgTrim 1: DupTrim 2: Concat
opt = parser.parse_args()

assert(int(opt.type) >= 0 and int(opt.type) <= 2)

with h5py.File("combined_val.h5", "w") as writeHere:
with h5py.File('rgb_val.h5', "r") as rgbF:
with h5py.File('audio_val.h5', "r") as AudioF:
for i in tqdm(rgbF.keys()):
videoFeats = torch.tensor(np.array(rgbF[i]))
audioFeats = torch.tensor(np.array(AudioF[i]))

# AvgTrim
if opt.type == "0":
k = audioFeats.shape[0] // videoFeats.shape[0]
combinedFeats = torch.zeros(1, 128)
for j in range(0, audioFeats.shape[0], 2):
combinedFeats = torch.cat([combinedFeats, torch.mean(audioFeats[j:j+k, :], 0).unsqueeze(0)], dim=0)
combinedFeats = combinedFeats[1:, :]
commonSize = min(combinedFeats.shape[0], videoFeats.shape[0])
videoFeats = videoFeats[: commonSize, :]
combinedFeats = combinedFeats[: commonSize, :]
combinedFeats = torch.cat([videoFeats, combinedFeats], dim=1)
writeHere.create_dataset(i, data=combinedFeats.detach().cpu().numpy())
continue

# DupTrim
if opt.type == "1":
if audioFeats.shape[0] > videoFeats.shape[0]:
k = audioFeats.shape[0] // videoFeats.shape[0]
videoFeats = videoFeats.unsqueeze(1).repeat(1, k, 1)
videoFeats = videoFeats.reshape(videoFeats.shape[0] * videoFeats.shape[1], videoFeats.shape[2])
else:
k = videoFeats.shape[0] // audioFeats.shape[0]
audioFeats = audioFeats.unsqueeze(1).repeat(1, k, 1)
audioFeats = audioFeats.reshape(audioFeats.shape[0] * audioFeats.shape[1], audioFeats.shape[2])

commonSize = min(audioFeats.shape[0], videoFeats.shape[0])
videoFeats = videoFeats[: commonSize, :]
audioFeats = audioFeats[: commonSize, :]
combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
writeHere.create_dataset(i, data=combinedFeats.detach().cpu().numpy())
continue

# Concat
if opt.type == "2":
combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
writeHere.create_dataset(i, data=combinedFeats.detach().cpu().numpy())
continue
53 changes: 53 additions & 0 deletions muses_fusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import torch
import argparse
from glob import glob
from tqdm import tqdm

parser = argparse.ArgumentParser()
parser.add_argument('--type', type=str, default='0', help='fusion type') # 0: AvgTrim 1: DupTrim 2: Concat
opt = parser.parse_args()

assert(int(opt.type) >= 0 and int(opt.type) <= 2)

files = glob('I3D_RGB/*')
for i in tqdm(files):
videoFeats = torch.load(i)
audioFeats = torch.load(i.replace('I3D_RGB', 'AudioFeats'))

# AvgTrim
if opt.type == "0":
k = audioFeats.shape[0] // videoFeats.shape[0]
combinedFeats = torch.zeros(1, 128)
for j in range(0, audioFeats.shape[0], 2):
combinedFeats = torch.cat([combinedFeats, torch.mean(audioFeats[j:j+k, :], 0).unsqueeze(0)], dim=0)
combinedFeats = combinedFeats[1:, :]
commonSize = min(combinedFeats.shape[0], videoFeats.shape[0])
videoFeats = videoFeats[: commonSize, :]
combinedFeats = combinedFeats[: commonSize, :]
combinedFeats = torch.cat([videoFeats, combinedFeats], dim=1)
torch.save(combinedFeats, i.replace('I3D_RGB', 'combinedFeats'))
continue

# DupTrim
if opt.type == "1":
if audioFeats.shape[0] > videoFeats.shape[0]:
k = audioFeats.shape[0] // videoFeats.shape[0]
videoFeats = videoFeats.unsqueeze(1).repeat(1, k, 1)
videoFeats = videoFeats.reshape(videoFeats.shape[0] * videoFeats.shape[1], videoFeats.shape[2])
else:
k = videoFeats.shape[0] // audioFeats.shape[0]
audioFeats = audioFeats.unsqueeze(1).repeat(1, k, 1)
audioFeats = audioFeats.reshape(audioFeats.shape[0] * audioFeats.shape[1], audioFeats.shape[2])

commonSize = min(audioFeats.shape[0], videoFeats.shape[0])
videoFeats = videoFeats[: commonSize, :]
audioFeats = audioFeats[: commonSize, :]
combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
torch.save(combinedFeats, i.replace('I3D_RGB', 'combinedFeats'))
continue

# Concat
if opt.type == "2":
combinedFeats = torch.cat([videoFeats, audioFeats], dim=1)
torch.save(combinedFeats, i.replace('I3D_RGB', 'combinedFeats'))
continue

0 comments on commit 49a542e

Please sign in to comment.