Add examples, more options and instructions.

mikeqzy · Dec 15, 2023 · be03652 · be03652
1 parent 8fb0c94
commit be03652
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -66,3 +66,33 @@ cd ../../..
 ```
 
 ## Running
+
+Execute `zerorf.py` to run ZeroRF.
+
+**Zero123++ Image**
+
+ZeroRF can be used to perform reconstruction on generated multi-view images to perform 3D content generation.
+You need to prepare a segmented RGBA image in Zero123++ format (see https://github.com/SUDO-AI-3D/zero123plus).
+An example can be found at `examples/ice.png`.
+
+```bash
+python zerorf.py --load-image=examples/ice.png
+```
+
+The default setup requires 10GB VRAM to operate.
+
+**NeRF-Synthetic**
+
+To run general reconstruction, you can prepare the dataset in NeRF-Synthetic format.
+The NeRF-Synthetic dataset itself can be obtained [here](https://drive.google.com/drive/folders/1JDdLGDruGNXWnM1eqY1FNL9PlStjaKWi).
+
+```bash
+python zerorf.py --rep=tensorf --data-dir=path/to/nerf_synthetic --obj=hotdog --n-views=6
+```
+
+The default setup requires about 16GB VRAM to operate depending on the object.
+You may want to adjust the `--n-rays-up` parameter to a lower value so it fits your VRAM (convergence could take more steps and longer time).
+
+**Configuration**
+
+You can find more configurations in `opt.py`.
diff --git a/examples/ice.png b/examples/ice.png
diff --git a/opt.py b/opt.py
@@ -20,15 +20,18 @@ def config_parser(cmd=None):
 
     # model
     parser.add_argument("--model-res", type=int, 
-                        default=20, help='model resolution')
+                        default=20, help='noise resolution (should be about 1/40 the provided image resolution), ignored when load-image is set')
     parser.add_argument("--model-ch", type=int, 
-                        default=8, help='model channel')
-    parser.add_argument("--n-rays", type=int, 
-                        default=2**12, help='number of rays per batch')
-    parser.add_argument("--learn-bg", type=bool, 
-                        default=False, help='if learn background')
+                        default=8, help='noise channel')
+    parser.add_argument("--n-rays-init", type=int, 
+                        default=2**12, help='number of rays per batch initially')
+    parser.add_argument("--n-rays-up", type=int, 
+                        default=2**16, help='number of rays per batch after 100 iterations')
+    parser.add_argument("--learn-bg", action='store_true', help='if learn background')
     parser.add_argument("--bg-color", type=float, 
                         default=1.0, help='background color')
+    parser.add_argument("--rep", type=str, choices=['dif', 'tensorf'],
+                        default="dif", help="representation to use")
 
     # training
     parser.add_argument("--net-lr", type=float, 

diff --git a/requirements.txt b/requirements.txt
@@ -18,3 +18,4 @@ transformers
 diffusers[torch]>=0.17.1, <0.19.0
 triton
 torch_redstone
+configargparse
diff --git a/zerorf.py b/zerorf.py
@@ -170,7 +170,7 @@ def kmeans_downsample(points, n_points_to_sample):
     code_activation=dict(type='IdentityCode'),
     grid_size=64,
     patch_size=32,
-    decoder=decoder_2,
+    decoder=decoder_2 if args.rep == 'dif' else decoder_1,
     decoder_use_ema=False,
     bg_color=1.0,
     pixel_loss=dict(
@@ -185,8 +185,8 @@ def kmeans_downsample(points, n_points_to_sample):
     dt_gamma_scale=0.5,
     density_thresh=0.05,
     extra_scene_step=0,
-    n_inverse_rays=args.n_rays,
-    n_decoder_rays=args.n_rays,
+    n_inverse_rays=args.n_rays_init,
+    n_decoder_rays=args.n_rays_init,
     loss_coef=0.1 / (pic_h * pic_w),
     optimizer=dict(type='Adam', lr=0, weight_decay=0.),
     lr_scheduler=dict(type='ExponentialLR', gamma=0.99),
@@ -200,7 +200,7 @@ def kmeans_downsample(points, n_points_to_sample):
     density_thresh=0.01,
     max_render_rays=pic_h * pic_w,
     dt_gamma_scale=0.5,
-    n_inverse_rays=args.n_rays,
+    n_inverse_rays=args.n_rays_init,
     loss_coef=0.1 / (pic_h * pic_w),
     n_inverse_steps=400,
     optimizer=dict(type='Adam', lr=0.0, weight_decay=0.),
@@ -231,11 +231,11 @@ def kmeans_downsample(points, n_points_to_sample):
     prog.set_postfix(**lv)
     wandb.log(dict(train=lv))
     if j == 50:
-        nerf.train_cfg['n_inverse_rays'] = 2 ** 14
-        nerf.train_cfg['n_decoder_rays'] = 2 ** 14
+        nerf.train_cfg['n_inverse_rays'] = round((args.n_rays_init * args.n_rays_up) ** 0.5)
+        nerf.train_cfg['n_decoder_rays'] = round((args.n_rays_init * args.n_rays_up) ** 0.5)
     if j == 100:
-        nerf.train_cfg['n_inverse_rays'] = 2 ** 16 if args.load_image else 2 ** 17
-        nerf.train_cfg['n_decoder_rays'] = 2 ** 16 if args.load_image else 2 ** 17
+        nerf.train_cfg['n_inverse_rays'] = args.n_rays_up
+        nerf.train_cfg['n_decoder_rays'] = args.n_rays_up
     if j % args.val_iter == args.val_iter - 1:
         cam = OrbitCamera('final', pic_w, pic_h, 3.2, 48)
         cache = nerf.cache[0]