From 47eceafb191d310fabcb092d6bbdf203805c9d5c Mon Sep 17 00:00:00 2001
From: Teng Wang <ttengwang@gmail.com>
Date: Sat, 28 Aug 2021 01:49:42 +0800
Subject: [PATCH] update readme

---
 README.md                        | 101 ++++++++++++++++---------------
 eval_utils.py                    |   8 +++
 models/deformable_transformer.py |   7 +--
 models/pdvc.py                   |   2 -
 4 files changed, 61 insertions(+), 57 deletions(-)

diff --git a/README.md b/README.md
index 3b1b33c..01d6409 100644
--- a/README.md
+++ b/README.md
@@ -37,28 +37,48 @@ cd models/ops
 sh make.sh
 ```
 
-# Dense Video Captioning
-### PDVC
-- Training
-```bash
-python train.py --cfg_path cfgs/anet_c3d_pdvc.yml --gpu_id ${GPU_ID}
+# Performance
+### Dense video captioning
+
+|  Model | Features | config_path |   Url   | Recall | Precision |    BLEU4   | METEOR2018 | METEOR2021 |  CIDEr | SODA_c |
+|  ----  |  ----    |   ----  |  ----  |  ----   |  ----  |   ----  |  ----  |  ----  |  ----  | ---- |
+| PDVC_light   | C3D  | cfgs/anet_c3d_pdvcl.yml | [Google Drive](https://drive.google.com/drive/folders/1JKOJrm5QMAkso-VJnzGnksIVqNYt8BSI?usp=sharing)  |  55.30   |  58.42  | 1.55  |  7.13  |  7.66 | 24.80  |  5.23  |
+| PDVC_light   | TSN | cfgs/anet_tsn_pdvcl.yml | [Google Drive](https://drive.google.com/drive/folders/1hImJ7sXABzS-ycErruLFCE_pkWEHzFSV?usp=sharing)  |  55.34   |  57.97  | 1.66  |  7.41  |  7.97 | 27.23  |  5.51  |
+| PDVC   | C3D  | cfgs/anet_c3d_pdvc.yml |  [Google Drive](https://drive.google.com/drive/folders/1I77miVvThdMenmprgozfRsXDVoc-9TxY?usp=sharing)  |  55.20   |  57.36  | 1.82  |  7.48  |  8.09  | 28.16  |  5.47  |
+| PDVC   | TSN  | cfgs/anet_tsn_pdvc.yml | [Google Drive](https://drive.google.com/drive/folders/1v2Xj0Qjt3Te_SgVyySKEofRaZsSw_rjs?usp=sharing)  |  56.21   |  57.46  | 1.92  |  8.00  |  8.63 | 29.00  |  5.68  |
+Notes:
+* In the paper, we follow the most previous methods to use the [evaluation toolkit in ActivityNet Challenge 2018](https://github.com/ranjaykrishna/densevid_eval/tree/deba7d7e83012b218a4df888f6c971e21cfeea33). Note that the latest [evluation tookit](https://github.com/ranjaykrishna/densevid_eval/tree/9d4045aced3d827834a5d2da3c9f0692e3f33c1c) (METEOR2021) gives the same CIDEr/BLEU4 but a higher METEOR score. 
+* In the paper, we use an [old version of SODA_c implementation](https://github.com/fujiso/SODA/tree/22671b3570e088217139bcb1e4de7a3499c30294), while here we use an [updated version](https://github.com/fujiso/SODA/tree/9cb3e2c5a73c4e320a38c72f320b63bbef4aa798) for convenience.
+
+### Video paragraph captioning
+|  Model | Features | config_path | BLEU4 | METEOR | CIDEr |
+|  ----  |  ----    |   ----  |  ----  |  ----  |   ----  |
+| PDVC   | C3D  | cfgs/anet_c3d_pdvc.yml |  9.67   |  14.74  | 16.43  |  
+| PDVC   | TSN  | cfgs/anet_tsn_pdvc.yml |  10.18   |  15.96  | 20.66  | 
+Notes:
+* Paragraph-level scores are evaluated on the ActivityNet Entity ae-val set.
+
+# Usage
+### Dense Video Captioning
+1. PDVC with learnt proposal
 ```
-The script will print the log and evaluate the model for every epoch. The results and logs are saved in `./save/args.id`.
+# Training
+config_path=cfgs/anet_c3d_pdvc.yml
+python train.py --cfg_path ${config_path} --gpu_id ${GPU_ID}
+# The script will evaluate the model for every epoch. The results and logs are saved in `./save`.
 
-- Evaluation
-```bash
-eval_folder=anet_c3d_pdvc # the folder name you want to evaluate
+# Evaluation
+eval_folder=anet_c3d_pdvc # specify the folder to be evaluated
 python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type queries --gpu_id ${GPU_ID}
 ```
+2. PDVC with gt proposals
 
-### PDVC with gt proposals
-
-- Training
-```bash
-python train.py --cfg_path cfgs/anet_c3d_pdvc_gt.yml --gpu_id ${GPU_ID}
 ```
-- Evaluation
-```bash
+# Training
+config_path=cfgs/anet_c3d_pdvc.yml
+python train.py --cfg_path ${config_path} --gpu_id ${GPU_ID}
+
+# Evaluation
 eval_folder=anet_c3d_pdvc_gt
 python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_proposals --gpu_id ${GPU_ID}
 ```
@@ -66,46 +86,27 @@ python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_pro
 
 # Video Paragraph Captioning
 
+1. PDVC with learnt proposal
 ```bash
-# PDVC with leanrt proposal for paragraph captioning
+# Training
+config_path=cfgs/anet_c3d_pdvc.yml
+python train.py --cfg_path ${config_path} --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} 
 
-## Training
-python train.py --cfg_path cfgs/anet_c3d_pdvc.yml --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} 
-
-## Evaluation
-eval_folder=anet_c3d_pdvc # the folder name you want to evaluate
-python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type queries --criteria_for_best_ckpt pc --gpu_id ${GPU_ID}
-
-
-#PDVC with gt proposals for paragraph captioning
-
-##Training
-python train.py --cfg_path cfgs/anet_c3d_pdvc_gt.yml --criteria_for_best_ckpt pc --gpu_id ${GPU_ID}
+# Evaluation
+eval_folder=anet_c3d_pdvc # specify the folder to be evaluated
+python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type queries --gpu_id ${GPU_ID}
+```
+2. PDVC with gt proposal
+```
+# Training
+config_path=cfgs/anet_c3d_pdvc.yml
+python train.py --cfg_path ${config_path} --criteria_for_best_ckpt pc --gpu_id ${GPU_ID}
 
-##Evaluation
+# Evaluation
 eval_folder=anet_c3d_pdvc_gt
-python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_proposals --criteria_for_best_ckpt pc --gpu_id ${GPU_ID}
+python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_proposals --gpu_id ${GPU_ID}
 ```
 
-
-# Performance
-
-|  Model | Features |   Url   | Recall | Precision |    BLEU4   | METEOR2018 | METEOR2021 |  CIDEr | SODA_c | METEOR (Para-level) |
-|  ----  |  ----    |   ----  |  ----   |  ----  |   ----  |  ----  |  ----  |  ----  | ---- | ---- |
-| PDVC_light   | C3D  | [Google Drive](https://drive.google.com/drive/folders/1JKOJrm5QMAkso-VJnzGnksIVqNYt8BSI?usp=sharing)  |  55.30   |  58.42  | 1.55  |  7.13  |  7.66 | 24.80  |  5.23  | 14.51 |
-| PDVC_light   | TSN  | [Google Drive](https://drive.google.com/drive/folders/1hImJ7sXABzS-ycErruLFCE_pkWEHzFSV?usp=sharing)  |  55.34   |  57.97  | 1.66  |  7.41  |  7.97 | 27.23  |  5.51  | 15.00 |
-| PDVC   | C3D  | [Google Drive](https://drive.google.com/drive/folders/1I77miVvThdMenmprgozfRsXDVoc-9TxY?usp=sharing)  |  55.20   |  57.36  | 1.82  |  7.48  |  8.09  | 28.16  |  5.47  | 14.71 |
-| PDVC   | TSN  | [Google Drive](https://drive.google.com/drive/folders/1v2Xj0Qjt3Te_SgVyySKEofRaZsSw_rjs?usp=sharing)  |  56.21   |  57.46  | 1.92  |  8.00  |  8.63 | 29.00  |  5.68  | 15.85 |
-
-
-Some notes:
-* In the paper, we follow the most previous methods to use the [evaluation toolkit in ActivityNet Challenge 2018](https://github.com/ranjaykrishna/densevid_eval/tree/deba7d7e83012b218a4df888f6c971e21cfeea33). Note that the latest [evluation tookit](https://github.com/ranjaykrishna/densevid_eval/tree/9d4045aced3d827834a5d2da3c9f0692e3f33c1c) (METEOR2021) gives a higher METEOR score.
-* Paragraph-level METEOR is evaluated on the ActivityNet Entity ae-val set, while others are on the standard  ActivityNet Captions validation set.
-
-
-
-
-
 # TODO
 - [ ] more pretrained models
 - [ ] support youcook2
diff --git a/eval_utils.py b/eval_utils.py
index 823f8ca..d46962a 100644
--- a/eval_utils.py
+++ b/eval_utils.py
@@ -2,12 +2,20 @@
 from __future__ import division
 from __future__ import print_function
 
+import os
+import sys
 import collections
 import torch
 import numpy as np
 import json
 from collections import OrderedDict
 from tqdm import tqdm
+from os.path import dirname, abspath
+
+pdvc_dir = dirname(abspath(__file__))
+sys.path.insert(0, pdvc_dir)
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3'))
+sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA'))
 
 from densevid_eval3.eval_soda import eval_soda
 from densevid_eval3.eval_para import eval_para
diff --git a/models/deformable_transformer.py b/models/deformable_transformer.py
index 3223d25..89bdbd0 100644
--- a/models/deformable_transformer.py
+++ b/models/deformable_transformer.py
@@ -23,13 +23,11 @@ class DeformableTransformer(nn.Module):
     def __init__(self, d_model=256, nhead=8,
                  num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=1024, dropout=0.1,
                  activation="relu", return_intermediate_dec=False,
-                 num_feature_levels=4, dec_n_points=4, enc_n_points=4, two_stage_num_proposals=300):
+                 num_feature_levels=4, dec_n_points=4, enc_n_points=4):
         super().__init__()
 
         self.d_model = d_model
         self.nhead = nhead
-        # self.two_stage = two_stage
-        self.two_stage_num_proposals = two_stage_num_proposals
 
         self.no_encoder = (num_encoder_layers == 0)
         self.num_feature_levels = num_feature_levels
@@ -354,5 +352,4 @@ def build_deforamble_transformer(args):
         return_intermediate_dec=True,
         num_feature_levels=args.num_feature_levels,
         dec_n_points=args.dec_n_points,
-        enc_n_points=args.enc_n_points,
-        two_stage_num_proposals=args.num_queries)
+        enc_n_points=args.enc_n_points)
diff --git a/models/pdvc.py b/models/pdvc.py
index 3b2bd41..9445bca 100644
--- a/models/pdvc.py
+++ b/models/pdvc.py
@@ -95,8 +95,6 @@ def __init__(self, base_encoder, transformer, captioner, num_classes, num_querie
             self.transformer.decoder.bbox_head = None
 
         self.translator = translator
-        # self.rl_scorers = init_scorer(opt.rl_scorer_types, opt.cached_tokens)
-        # self.rl_reward_weights = {k: v for k, v in zip(opt.rl_scorer_types, opt.rl_scorer_weights)}
 
         self.disable_mid_caption_heads = opt.disable_mid_caption_heads
         if self.disable_mid_caption_heads: