diff --git a/README.md b/README.md index 836e19d..b007d8d 100644 --- a/README.md +++ b/README.md @@ -66,3 +66,33 @@ cd ../../.. ``` ## Running + +Execute `zerorf.py` to run ZeroRF. + +**Zero123++ Image** + +ZeroRF can be used to perform reconstruction on generated multi-view images to perform 3D content generation. +You need to prepare a segmented RGBA image in Zero123++ format (see https://github.com/SUDO-AI-3D/zero123plus). +An example can be found at `examples/ice.png`. + +```bash +python zerorf.py --load-image=examples/ice.png +``` + +The default setup requires 10GB VRAM to operate. + +**NeRF-Synthetic** + +To run general reconstruction, you can prepare the dataset in NeRF-Synthetic format. +The NeRF-Synthetic dataset itself can be obtained [here](https://drive.google.com/drive/folders/1JDdLGDruGNXWnM1eqY1FNL9PlStjaKWi). + +```bash +python zerorf.py --rep=tensorf --data-dir=path/to/nerf_synthetic --obj=hotdog --n-views=6 +``` + +The default setup requires about 16GB VRAM to operate depending on the object. +You may want to adjust the `--n-rays-up` parameter to a lower value so it fits your VRAM (convergence could take more steps and longer time). + +**Configuration** + +You can find more configurations in `opt.py`. diff --git a/examples/ice.png b/examples/ice.png new file mode 100644 index 0000000..e6e8ba0 Binary files /dev/null and b/examples/ice.png differ diff --git a/opt.py b/opt.py index 5bf4b61..fa80e44 100644 --- a/opt.py +++ b/opt.py @@ -20,15 +20,18 @@ def config_parser(cmd=None): # model parser.add_argument("--model-res", type=int, - default=20, help='model resolution') + default=20, help='noise resolution (should be about 1/40 the provided image resolution), ignored when load-image is set') parser.add_argument("--model-ch", type=int, - default=8, help='model channel') - parser.add_argument("--n-rays", type=int, - default=2**12, help='number of rays per batch') - parser.add_argument("--learn-bg", type=bool, - default=False, help='if learn background') + default=8, help='noise channel') + parser.add_argument("--n-rays-init", type=int, + default=2**12, help='number of rays per batch initially') + parser.add_argument("--n-rays-up", type=int, + default=2**16, help='number of rays per batch after 100 iterations') + parser.add_argument("--learn-bg", action='store_true', help='if learn background') parser.add_argument("--bg-color", type=float, default=1.0, help='background color') + parser.add_argument("--rep", type=str, choices=['dif', 'tensorf'], + default="dif", help="representation to use") # training parser.add_argument("--net-lr", type=float, diff --git a/requirements.txt b/requirements.txt index 6e5e9d3..f4a3a4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,3 +18,4 @@ transformers diffusers[torch]>=0.17.1, <0.19.0 triton torch_redstone +configargparse diff --git a/zerorf.py b/zerorf.py index 625f564..cd52155 100644 --- a/zerorf.py +++ b/zerorf.py @@ -170,7 +170,7 @@ def kmeans_downsample(points, n_points_to_sample): code_activation=dict(type='IdentityCode'), grid_size=64, patch_size=32, - decoder=decoder_2, + decoder=decoder_2 if args.rep == 'dif' else decoder_1, decoder_use_ema=False, bg_color=1.0, pixel_loss=dict( @@ -185,8 +185,8 @@ def kmeans_downsample(points, n_points_to_sample): dt_gamma_scale=0.5, density_thresh=0.05, extra_scene_step=0, - n_inverse_rays=args.n_rays, - n_decoder_rays=args.n_rays, + n_inverse_rays=args.n_rays_init, + n_decoder_rays=args.n_rays_init, loss_coef=0.1 / (pic_h * pic_w), optimizer=dict(type='Adam', lr=0, weight_decay=0.), lr_scheduler=dict(type='ExponentialLR', gamma=0.99), @@ -200,7 +200,7 @@ def kmeans_downsample(points, n_points_to_sample): density_thresh=0.01, max_render_rays=pic_h * pic_w, dt_gamma_scale=0.5, - n_inverse_rays=args.n_rays, + n_inverse_rays=args.n_rays_init, loss_coef=0.1 / (pic_h * pic_w), n_inverse_steps=400, optimizer=dict(type='Adam', lr=0.0, weight_decay=0.), @@ -231,11 +231,11 @@ def kmeans_downsample(points, n_points_to_sample): prog.set_postfix(**lv) wandb.log(dict(train=lv)) if j == 50: - nerf.train_cfg['n_inverse_rays'] = 2 ** 14 - nerf.train_cfg['n_decoder_rays'] = 2 ** 14 + nerf.train_cfg['n_inverse_rays'] = round((args.n_rays_init * args.n_rays_up) ** 0.5) + nerf.train_cfg['n_decoder_rays'] = round((args.n_rays_init * args.n_rays_up) ** 0.5) if j == 100: - nerf.train_cfg['n_inverse_rays'] = 2 ** 16 if args.load_image else 2 ** 17 - nerf.train_cfg['n_decoder_rays'] = 2 ** 16 if args.load_image else 2 ** 17 + nerf.train_cfg['n_inverse_rays'] = args.n_rays_up + nerf.train_cfg['n_decoder_rays'] = args.n_rays_up if j % args.val_iter == args.val_iter - 1: cam = OrbitCamera('final', pic_w, pic_h, 3.2, 48) cache = nerf.cache[0]