Skip to content

Commit 1fb9a3e

Browse files
committed
Fix npz load issue
1 parent b78e2ee commit 1fb9a3e

File tree

18 files changed

+70
-158
lines changed

18 files changed

+70
-158
lines changed

common/load_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,20 @@ def write_json(data_dict: Any, filename: str) -> None:
5050
with open(filename, "w") as outfile:
5151
outfile.write(json_obj)
5252

53+
def load_npz_as_dict(filename: str) -> dict:
54+
with np.load(filename, allow_pickle=True) as npz:
55+
if isinstance(npz, np.lib.npyio.NpzFile):
56+
out = {}
57+
for k in npz.files:
58+
val = npz[k]
59+
if (isinstance(val, np.ndarray) and
60+
val.dtype == object and
61+
val.shape == ()):
62+
out[k] = val.item()
63+
else:
64+
out[k] = val
65+
return out
66+
5367
def get_print_format(value: Any) -> str:
5468
"""Determines the appropriate format string for a given value."""
5569
if isinstance(value, int):

configs/preprocess/process_2d.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ data:
3434
processor2D : ARKitScenes2DProcessor
3535
processor1D : ARKitScenes1DProcessor
3636
skip_frames : 1
37+
3738
MultiScan:
3839
base_dir : /media/sayan/Expansion/data/datasets/MultiScan
3940
process_dir : ${data.process_dir}/MultiScan

configs/train/train_scene_crossover.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ task:
7272
scene_modalities : ['rgb', 'point', 'floorplan', 'referral']
7373
train : [Scannet, Scan3R, MultiScan, ARKitScenes]
7474
val : [Scannet, Scan3R, MultiScan, ARKitScenes]
75-
object_enc_ckpt : /drive/dumps/multimodal-spaces/runs/release_runs/instance_crossover_scannet+scan3r+multiscan+arkitscenes.pth
75+
object_enc_ckpt : /drive/dumps/multimodal-spaces/runs/curr_runs/instance_crossover_scannet+scan3r+multiscan+arkitscenes.pth
7676

7777
trainer: UnifiedTrainer
7878

data/datasets/scanbase.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
from omegaconf import DictConfig
1111
from typing import List, Dict, Any
1212

13+
from common.load_utils import load_npz_as_dict
1314
from ..transforms import get_transform
1415
from ..data_utils import pad_tensors
1516

17+
1618
class ScanObjectBase(Dataset):
1719
"""Base Dataset class for instance level training"""
1820
def __init__(self, data_config: DictConfig, split: str) -> None:
@@ -131,18 +133,13 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
131133

132134
scan_process_dir = osp.join(self.process_dir, 'scans', scan_id)
133135

134-
# scan_objects_data = torch.load(osp.join(scan_process_dir, 'objectsDataMultimodal.pt'))
135-
scan_objects_data = np.load(osp.join(scan_process_dir, 'objectsDataMultimodal.npz'), allow_pickle=True)
136-
137-
# scandata_1d = torch.load(osp.join(scan_process_dir, 'data1D.pt'))
138-
scandata_1d = np.load(osp.join(scan_process_dir, 'data1D.npz'), allow_pickle=True)
139-
# scandata_2d = torch.load(osp.join(scan_process_dir, 'data2D.pt'))
140-
scandata_2d = np.load(osp.join(scan_process_dir, 'data2D.npz'), allow_pickle=True)
141-
# scandata_3d = torch.load(osp.join(scan_process_dir, 'data3D.pt'))
142-
scandata_3d = np.load(osp.join(scan_process_dir, 'data3D.npz'), allow_pickle=True)
136+
scan_objects_data = load_npz_as_dict(osp.join(scan_process_dir, 'objectsDataMultimodal.npz'))
137+
scandata_1d = load_npz_as_dict(osp.join(scan_process_dir, 'data1D.npz'))
138+
scandata_2d = load_npz_as_dict(osp.join(scan_process_dir, 'data2D.npz'))
139+
scandata_3d = load_npz_as_dict(osp.join(scan_process_dir, 'data3D.npz'))
143140

144141
# Point Cloud Data -- Scene
145-
points, feats, scene_label = scandata_3d['scene'].item()['pcl_coords'], scandata_3d['scene'].item()['pcl_feats'], scandata_3d['scene'].item()['scene_label']
142+
points, feats, scene_label = scandata_3d['scene']['pcl_coords'], scandata_3d['scene']['pcl_feats'], scandata_3d['scene']['scene_label']
146143
feats /= 255.
147144
feats -= 0.5
148145

@@ -189,24 +186,25 @@ def __getitem__(self, index: int) -> Dict[str, Any]:
189186

190187
scene_dict['scene_masks'] = {}
191188

192-
rgb_embedding = torch.from_numpy(scandata_2d['scene'].item()['scene_embeddings'])
189+
rgb_embedding = torch.from_numpy(scandata_2d['scene']['scene_embeddings'])
193190
rgb_embedding = torch.concatenate([rgb_embedding[:, 0, :], rgb_embedding[:, 1:, :].mean(dim=1)], dim=1)
191+
rgb_embedding = rgb_embedding[list(range(0, rgb_embedding.shape[0], 2)), :]
194192
scene_dict['rgb_embedding'] = rgb_embedding
195193

196194
scene_dict['scene_masks']['rgb'] = torch.Tensor([1.0])
197195
scene_dict['scene_masks']['point'] = torch.Tensor([1.0])
198196
scene_dict['scene_masks']['object'] = torch.Tensor([1.0])
199197

200198
referral_mask = torch.Tensor([0.0])
201-
referral_embedding = scandata_1d['scene'].item()['referral_embedding']
199+
referral_embedding = scandata_1d['scene']['referral_embedding']
202200

203201
if referral_embedding is not None:
204202
referral_embedding = torch.from_numpy(referral_embedding[0]['feat']).reshape(-1,)
205203
referral_mask = torch.Tensor([1.0])
206204
else:
207205
referral_embedding = torch.zeros((scene_dict['rgb_embedding'].shape[-1] // 4, ))
208206

209-
floorplan_embedding = scandata_2d['scene'].item()['floorplan']['embedding']
207+
floorplan_embedding = scandata_2d['scene']['floorplan']['embedding']
210208
floorplan_mask = torch.Tensor([0.0])
211209
if floorplan_embedding is not None:
212210
floorplan_embedding = torch.from_numpy(floorplan_embedding[0, 0]).reshape(-1, )

preprocess/feat1D/arkit.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ def compute1DFeaturesEachScan(self, scan_id):
5959
if osp.exists(pt_1d_path):
6060
os.remove(pt_1d_path)
6161

62-
npz_data = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)
63-
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map'].item()
62+
npz_data = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))
63+
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map']
6464

6565
scan_objects = self.load_objects_for_scan(scan_id)
6666

preprocess/feat1D/multiscan.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,8 @@ def compute1DFeaturesEachScan(self, scan_id):
5555
if osp.exists(pt_1d_path):
5656
os.remove(pt_1d_path)
5757

58-
# else:
59-
npz_data = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)
60-
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map'].item()
58+
npz_data = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))
59+
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map']
6160

6261
scan_objects = self.load_objects_for_scan(scan_id)
6362

preprocess/feat1D/scan3r.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ def compute1DFeaturesEachScan(self, scan_id: str) -> None:
3939
if osp.exists(pt_1d_path):
4040
os.remove(pt_1d_path)
4141

42-
npz_data = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)
43-
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map'].item()
42+
npz_data = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))
43+
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map']
4444
scan_objects = [obj_data for obj_data in self.objects if obj_data['scan'] == scan_id][0]['objects']
4545

4646
object_referral_embeddings, scene_referral_embeddings = {}, None

preprocess/feat1D/scannet.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,8 @@ def compute1DFeaturesEachScan(self, scan_id: str) -> None:
4141
if osp.exists(pt_1d_path):
4242
os.remove(pt_1d_path)
4343

44-
# objectID_to_labelID_map = torch.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.pt'))['obj_id_to_label_id_map']
45-
npz_data = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)
46-
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map'].item()
44+
npz_data = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))
45+
objectID_to_labelID_map = npz_data['obj_id_to_label_id_map']
4746
objects = [objects['objects'] for objects in self.objects if objects['scan'] == scan_id]
4847

4948
object_referral_embeddings, scene_referral_embeddings = {}, None

preprocess/feat2D/arkit.py

Lines changed: 4 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -45,32 +45,22 @@ def __init__(self, config_data: DictConfig, config_2D: DictConfig, split: str) -
4545
for scan_id in self.scan_ids:
4646
pose_data = arkit.load_poses(osp.join(self.data_dir, 'scans', scan_id),scan_id, skip=self.frame_skip)
4747
self.frame_pose_data[scan_id] = pose_data
48-
4948

5049
def compute2DFeatures(self) -> None:
5150
for scan_id in tqdm(self.scan_ids):
5251
self.compute2DImagesAndSeg(scan_id)
53-
self.compute2DFeaturesEachScan(scan_id)
52+
self.compute2DFeaturesEachScan(scan_id)
5453

5554
def compute2DImagesAndSeg(self, scan_id: str) -> None:
5655
obj_id_imgs = {}
57-
scene_folder = osp.join(self.data_dir, 'scans', scan_id)
5856

5957
scene_out_dir = osp.join(self.out_dir, scan_id)
6058
load_utils.ensure_dir(scene_out_dir)
6159

6260
objects_path = osp.join(self.data_dir, 'scans', scan_id, f"{scan_id}_3dod_annotation.json")
6361
if not osp.exists(objects_path):
6462
raise FileNotFoundError(f"Annotations file not found for scan ID: {scan_id}")
65-
66-
gt_pt_path = osp.join(scene_folder, 'gt-projection-seg.pt')
67-
if osp.exists(gt_pt_path):
68-
os.remove(gt_pt_path)
69-
70-
gt_pt_path = osp.join(scene_out_dir, 'gt-projection-seg.pt')
71-
if osp.exists(gt_pt_path):
72-
os.remove(gt_pt_path)
73-
63+
7464
annotations = load_utils.load_json(objects_path)
7565
ply_data = arkit.load_ply_data(osp.join(self.data_dir,'scans'), scan_id, annotations)
7666
instance_ids = ply_data['objectId']
@@ -110,11 +100,8 @@ def compute2DFeaturesEachScan(self, scan_id: str) -> None:
110100

111101
scene_out_dir = osp.join(self.out_dir, scan_id)
112102
load_utils.ensure_dir(scene_out_dir)
113-
pt_2d_path = osp.join(scene_out_dir, 'data2D.pt')
114-
if osp.exists(pt_2d_path):
115-
os.remove(pt_2d_path)
116-
117-
obj_id_to_label_id_map = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)['obj_id_to_label_id_map'].item()
103+
104+
obj_id_to_label_id_map = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))['obj_id_to_label_id_map']
118105

119106
# Multi-view Image -- Object (Embeddings)
120107
object_image_embeddings, object_image_votes_topK, frame_idxs = self.computeImageFeaturesAllObjectsEachScan(scene_folder, scene_out_dir, obj_id_to_label_id_map)
@@ -147,36 +134,6 @@ def compute2DFeaturesEachScan(self, scan_id: str) -> None:
147134

148135
np.savez_compressed(osp.join(scene_out_dir, 'data2D.npz'), **data2D)
149136

150-
def computeAllImageFeaturesEachScan(self, scan_id: str) -> None:
151-
scene_folder = osp.join(self.data_dir, 'scans', scan_id)
152-
color_path = osp.join(scene_folder,f'{scan_id}_frames', 'lowres_wide')
153-
154-
scene_out_dir = osp.join(self.out_dir, scan_id)
155-
load_utils.ensure_dir(scene_out_dir)
156-
157-
frame_idxs = list(self.frame_pose_data[scan_id].keys())
158-
159-
# Extract Scene Image Features
160-
scene_images_pt = []
161-
scene_image_embeddings = []
162-
# sky_direction=self.metadata[self.metadata['video_id']==int(scan_id)]['sky_direction'].values[0]
163-
164-
for frame_index in frame_idxs:
165-
image = Image.open(osp.join(color_path, f'{scan_id}_{frame_index}.png'))
166-
167-
image = image.resize((self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC)
168-
image_pt = self.model.base_tf(image)
169-
170-
scene_image_embeddings.append(self.extractFeatures([image_pt], return_only_cls_mean= False))
171-
scene_images_pt.append(image_pt)
172-
173-
scene_image_embeddings = np.concatenate(scene_image_embeddings)
174-
data2D = {}
175-
data2D['scene'] = {'scene_embeddings': scene_image_embeddings, 'images' : scene_images_pt,
176-
'frame_idxs' : frame_idxs}
177-
# torch.save(data2D, osp.join(scene_out_dir, 'data2D_all_images.pt'))
178-
np.savez_compressed(osp.join(scene_out_dir, 'data2D_all_images.npz'), **data2D)
179-
180137
def computeSelectedImageFeaturesEachScan(self, scan_id: str, color_path: str, frame_idxs: List[int]) -> Tuple[np.ndarray, List[torch.tensor], np.ndarray, List[int]]:
181138
# Sample Camera Indexes Based on Rotation Matrix From Grid
182139
pose_data = []
@@ -204,9 +161,7 @@ def computeSelectedImageFeaturesEachScan(self, scan_id: str, color_path: str, fr
204161
scene_image_embeddings = self.extractFeatures(scene_images_pt, return_only_cls_mean= False)
205162

206163
return pose_data, scene_images_pt, scene_image_embeddings, sampled_frame_idxs
207-
# return pose_data, None, None, sampled_frame_idxs
208164

209-
210165
def computeImageFeaturesAllObjectsEachScan(self, scene_folder: str, scene_out_dir: str, obj_id_to_label_id_map: dict) -> Tuple[Dict[int, Dict[int, np.ndarray]], Dict[int, List[int]], List[str]]:
211166
object_anno_2D = np.load(osp.join(scene_out_dir, 'gt-projection-seg.npz'),allow_pickle=True)
212167
object_image_votes = {}

preprocess/feat2D/multiscan.py

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,6 @@ def __init__(self, config_data, config_2D, split) -> None:
4545
while(len(frame_idxs) > 500):
4646
self.frame_skip += 2
4747
frame_idxs = multiscan.load_frame_idxs(scene_folder, skip=self.frame_skip)
48-
# if len(frame_idxs) > 500:
49-
# frame_idxs = multiscan.load_frame_idxs(scene_folder, skip=2)
50-
# if len(frame_idxs) > 500:
51-
# frame_idxs = multiscan.load_frame_idxs(scene_folder, skip=5)
52-
# if len(frame_idxs) > 500:
53-
# frame_idxs = multiscan.load_frame_idxs(scene_folder, skip=10)
54-
# if len(frame_idxs) > 500:
55-
# frame_idxs = multiscan.load_frame_idxs(scene_folder, skip=15)
56-
# if len(frame_idxs) > 500:
57-
# frame_idxs = multiscan.load_frame_idxs(scene_folder, skip=20)
5848

5949
pose_data = multiscan.load_all_poses(scene_folder, frame_idxs)
6050
self.frame_pose_data[scan_id] = pose_data
@@ -72,15 +62,6 @@ def compute2DImagesAndSeg(self, scan_id):
7262
scene_out_dir = osp.join(self.out_dir, scan_id)
7363
load_utils.ensure_dir(scene_out_dir)
7464

75-
gt_pt_path = osp.join(scene_folder, 'gt-projection-seg.pt')
76-
if osp.exists(gt_pt_path):
77-
os.remove(gt_pt_path)
78-
79-
gt_pt_path = osp.join(scene_out_dir, 'gt-projection-seg.pt')
80-
if osp.exists(gt_pt_path):
81-
os.remove(gt_pt_path)
82-
83-
# else:
8465
mesh_file = osp.join(scene_folder, '{}.ply'.format(scan_id))
8566
ply_data = multiscan.load_ply_data(osp.join(self.data_dir, 'scenes'), scan_id)
8667
instance_ids = ply_data['objectId']
@@ -119,14 +100,7 @@ def compute2DFeaturesEachScan(self, scan_id):
119100
scene_out_dir = osp.join(self.out_dir, scan_id)
120101
load_utils.ensure_dir(scene_out_dir)
121102

122-
pt_2d_path = osp.join(scene_out_dir, 'data2D.pt')
123-
if osp.exists(pt_2d_path):
124-
os.remove(pt_2d_path)
125-
126-
127-
128-
# else:
129-
obj_id_to_label_id_map = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)['obj_id_to_label_id_map'].item()
103+
obj_id_to_label_id_map = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))['obj_id_to_label_id_map']
130104

131105
# Multi-view Image -- Object (Embeddings)
132106
object_image_embeddings, object_image_votes_topK, frame_idxs = self.computeImageFeaturesAllObjectsEachScan(scene_folder, scene_out_dir, obj_id_to_label_id_map)

preprocess/feat2D/scan3r.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,6 @@ def __init__(self, config_data: DictConfig, config_2D: DictConfig, split: str) -
5050
self.frame_pose_data[scan_id] = pose_data
5151

5252
def compute2DFeatures(self) -> None:
53-
if self.split == 'train':
54-
self.scan_ids = self.scan_ids[13+102+295:]
55-
else:
56-
self.scan_ids = self.scan_ids[:]
57-
5853
for scan_id in tqdm(self.scan_ids):
5954
self.compute2DImagesAndSeg(scan_id)
6055
self.compute2DFeaturesEachScan(scan_id)
@@ -66,15 +61,7 @@ def compute2DImagesAndSeg(self, scan_id: str) -> None:
6661
scene_out_dir = osp.join(self.out_dir, scan_id)
6762
load_utils.ensure_dir(scene_out_dir)
6863

69-
obj_id_imgs = {}
70-
gt_pt_path = osp.join(scene_out_dir, 'gt-projection-seg.pt')
71-
if osp.exists(gt_pt_path):
72-
os.remove(gt_pt_path)
73-
74-
gt_pt_path = osp.join(scene_folder, 'gt-projection-seg.pt')
75-
if osp.exists(gt_pt_path):
76-
os.remove(gt_pt_path)
77-
64+
obj_id_imgs = {}
7865
ply_data = scan3r.load_ply_data(self.data_dir, scan_id, self.label_filename)
7966
instance_ids = ply_data['objectId']
8067

@@ -113,11 +100,7 @@ def compute2DFeaturesEachScan(self, scan_id: str) -> None:
113100
scene_out_dir = osp.join(self.out_dir, scan_id)
114101
load_utils.ensure_dir(scene_out_dir)
115102

116-
pt_2d_path = osp.join(scene_out_dir, 'data2D.pt')
117-
if osp.exists(pt_2d_path):
118-
os.remove(pt_2d_path)
119-
120-
obj_id_to_label_id_map = np.load(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'),allow_pickle=True)['obj_id_to_label_id_map'].item()
103+
obj_id_to_label_id_map = load_utils.load_npz_as_dict(osp.join(scene_out_dir, 'object_id_to_label_id_map.npz'))['obj_id_to_label_id_map']
121104

122105
# Multi-view Image -- Object (Embeddings)
123106
object_image_embeddings, object_image_votes_topK, frame_idxs = self.computeImageFeaturesAllObjectsEachScan(scene_folder, scene_out_dir, obj_id_to_label_id_map)

preprocess/feat2D/scannet.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,7 @@ def compute2DFeaturesEachScan(self, scan_id: str) -> None:
8787

8888
scene_out_dir = osp.join(self.out_dir, scan_id)
8989
load_utils.ensure_dir(scene_out_dir)
90-
pt_2d_path = osp.join(scene_out_dir, 'data2D.pt')
91-
if osp.exists(pt_2d_path):
92-
os.remove(pt_2d_path)
90+
9391

9492
# Floor-plan rendering
9593
render_img = self.renderShapeAndFloorplan(scene_folder, scene_out_dir, scan_id)

0 commit comments

Comments
 (0)