forked from hbb1/2d-gaussian-splatting
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnerf_eval.py
86 lines (63 loc) · 3.17 KB
/
nerf_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# training scripts for the nerf-synthetic datasets
# this script is adopted from GOF
# https://github.com/autonomousvision/gaussian-opacity-fields/blob/main/scripts/run_nerf_synthetic.py
import os
import GPUtil
from concurrent.futures import ThreadPoolExecutor
import time
import itertools
scenes = ["ship", "drums", "ficus", "hotdog", "lego", "materials", "mic", "chair"]
factors = [1]
output_dir = "output/exp_nerf_synthetic"
dataset_dir = "data/nerf_synthetic"
dry_run = False
excluded_gpus = set([])
jobs = list(itertools.product(scenes, factors))
def train_scene(gpu, scene, factor):
cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python train.py -s {dataset_dir}/{scene} -m {output_dir}/{scene} --eval --white_background --lambda_normal 0.0 --port {6209+int(gpu)}"
print(cmd)
if not dry_run:
os.system(cmd)
cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python render.py -m {output_dir}/{scene} --skip_train --skip_mesh"
print(cmd)
if not dry_run:
os.system(cmd)
cmd = f"OMP_NUM_THREADS=4 CUDA_VISIBLE_DEVICES={gpu} python metrics.py -m {output_dir}/{scene}"
print(cmd)
if not dry_run:
os.system(cmd)
return True
def worker(gpu, scene, factor):
print(f"Starting job on GPU {gpu} with scene {scene}\n")
train_scene(gpu, scene, factor)
print(f"Finished job on GPU {gpu} with scene {scene}\n")
# This worker function starts a job and returns when it's done.
def dispatch_jobs(jobs, executor):
future_to_job = {}
reserved_gpus = set() # GPUs that are slated for work but may not be active yet
while jobs or future_to_job:
# Get the list of available GPUs, not including those that are reserved.
all_available_gpus = set(GPUtil.getAvailable(order="first", limit=10, maxMemory=0.5, maxLoad=0.5))
available_gpus = list(all_available_gpus - reserved_gpus - excluded_gpus)
# Launch new jobs on available GPUs
while available_gpus and jobs:
gpu = available_gpus.pop(0)
job = jobs.pop(0)
future = executor.submit(worker, gpu, *job) # Unpacking job as arguments to worker
future_to_job[future] = (gpu, job)
reserved_gpus.add(gpu) # Reserve this GPU until the job starts processing
# Check for completed jobs and remove them from the list of running jobs.
# Also, release the GPUs they were using.
done_futures = [future for future in future_to_job if future.done()]
for future in done_futures:
job = future_to_job.pop(future) # Remove the job associated with the completed future
gpu = job[0] # The GPU is the first element in each job tuple
reserved_gpus.discard(gpu) # Release this GPU
print(f"Job {job} has finished., rellasing GPU {gpu}")
# (Optional) You might want to introduce a small delay here to prevent this loop from spinning very fast
# when there are no GPUs available.
time.sleep(5)
print("All jobs have been processed.")
# Using ThreadPoolExecutor to manage the thread pool
with ThreadPoolExecutor(max_workers=8) as executor:
dispatch_jobs(jobs, executor)