From 47eceafb191d310fabcb092d6bbdf203805c9d5c Mon Sep 17 00:00:00 2001 From: Teng Wang Date: Sat, 28 Aug 2021 01:49:42 +0800 Subject: [PATCH] update readme --- README.md | 101 ++++++++++++++++--------------- eval_utils.py | 8 +++ models/deformable_transformer.py | 7 +-- models/pdvc.py | 2 - 4 files changed, 61 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 3b1b33c..01d6409 100644 --- a/README.md +++ b/README.md @@ -37,28 +37,48 @@ cd models/ops sh make.sh ``` -# Dense Video Captioning -### PDVC -- Training -```bash -python train.py --cfg_path cfgs/anet_c3d_pdvc.yml --gpu_id ${GPU_ID} +# Performance +### Dense video captioning + +| Model | Features | config_path | Url | Recall | Precision | BLEU4 | METEOR2018 | METEOR2021 | CIDEr | SODA_c | +| ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | +| PDVC_light | C3D | cfgs/anet_c3d_pdvcl.yml | [Google Drive](https://drive.google.com/drive/folders/1JKOJrm5QMAkso-VJnzGnksIVqNYt8BSI?usp=sharing) | 55.30 | 58.42 | 1.55 | 7.13 | 7.66 | 24.80 | 5.23 | +| PDVC_light | TSN | cfgs/anet_tsn_pdvcl.yml | [Google Drive](https://drive.google.com/drive/folders/1hImJ7sXABzS-ycErruLFCE_pkWEHzFSV?usp=sharing) | 55.34 | 57.97 | 1.66 | 7.41 | 7.97 | 27.23 | 5.51 | +| PDVC | C3D | cfgs/anet_c3d_pdvc.yml | [Google Drive](https://drive.google.com/drive/folders/1I77miVvThdMenmprgozfRsXDVoc-9TxY?usp=sharing) | 55.20 | 57.36 | 1.82 | 7.48 | 8.09 | 28.16 | 5.47 | +| PDVC | TSN | cfgs/anet_tsn_pdvc.yml | [Google Drive](https://drive.google.com/drive/folders/1v2Xj0Qjt3Te_SgVyySKEofRaZsSw_rjs?usp=sharing) | 56.21 | 57.46 | 1.92 | 8.00 | 8.63 | 29.00 | 5.68 | +Notes: +* In the paper, we follow the most previous methods to use the [evaluation toolkit in ActivityNet Challenge 2018](https://github.com/ranjaykrishna/densevid_eval/tree/deba7d7e83012b218a4df888f6c971e21cfeea33). Note that the latest [evluation tookit](https://github.com/ranjaykrishna/densevid_eval/tree/9d4045aced3d827834a5d2da3c9f0692e3f33c1c) (METEOR2021) gives the same CIDEr/BLEU4 but a higher METEOR score. +* In the paper, we use an [old version of SODA_c implementation](https://github.com/fujiso/SODA/tree/22671b3570e088217139bcb1e4de7a3499c30294), while here we use an [updated version](https://github.com/fujiso/SODA/tree/9cb3e2c5a73c4e320a38c72f320b63bbef4aa798) for convenience. + +### Video paragraph captioning +| Model | Features | config_path | BLEU4 | METEOR | CIDEr | +| ---- | ---- | ---- | ---- | ---- | ---- | +| PDVC | C3D | cfgs/anet_c3d_pdvc.yml | 9.67 | 14.74 | 16.43 | +| PDVC | TSN | cfgs/anet_tsn_pdvc.yml | 10.18 | 15.96 | 20.66 | +Notes: +* Paragraph-level scores are evaluated on the ActivityNet Entity ae-val set. + +# Usage +### Dense Video Captioning +1. PDVC with learnt proposal ``` -The script will print the log and evaluate the model for every epoch. The results and logs are saved in `./save/args.id`. +# Training +config_path=cfgs/anet_c3d_pdvc.yml +python train.py --cfg_path ${config_path} --gpu_id ${GPU_ID} +# The script will evaluate the model for every epoch. The results and logs are saved in `./save`. -- Evaluation -```bash -eval_folder=anet_c3d_pdvc # the folder name you want to evaluate +# Evaluation +eval_folder=anet_c3d_pdvc # specify the folder to be evaluated python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type queries --gpu_id ${GPU_ID} ``` +2. PDVC with gt proposals -### PDVC with gt proposals - -- Training -```bash -python train.py --cfg_path cfgs/anet_c3d_pdvc_gt.yml --gpu_id ${GPU_ID} ``` -- Evaluation -```bash +# Training +config_path=cfgs/anet_c3d_pdvc.yml +python train.py --cfg_path ${config_path} --gpu_id ${GPU_ID} + +# Evaluation eval_folder=anet_c3d_pdvc_gt python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_proposals --gpu_id ${GPU_ID} ``` @@ -66,46 +86,27 @@ python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_pro # Video Paragraph Captioning +1. PDVC with learnt proposal ```bash -# PDVC with leanrt proposal for paragraph captioning +# Training +config_path=cfgs/anet_c3d_pdvc.yml +python train.py --cfg_path ${config_path} --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} -## Training -python train.py --cfg_path cfgs/anet_c3d_pdvc.yml --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} - -## Evaluation -eval_folder=anet_c3d_pdvc # the folder name you want to evaluate -python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type queries --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} - - -#PDVC with gt proposals for paragraph captioning - -##Training -python train.py --cfg_path cfgs/anet_c3d_pdvc_gt.yml --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} +# Evaluation +eval_folder=anet_c3d_pdvc # specify the folder to be evaluated +python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type queries --gpu_id ${GPU_ID} +``` +2. PDVC with gt proposal +``` +# Training +config_path=cfgs/anet_c3d_pdvc.yml +python train.py --cfg_path ${config_path} --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} -##Evaluation +# Evaluation eval_folder=anet_c3d_pdvc_gt -python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_proposals --criteria_for_best_ckpt pc --gpu_id ${GPU_ID} +python eval.py --eval_folder ${eval_folder} --eval_transformer_input_type gt_proposals --gpu_id ${GPU_ID} ``` - -# Performance - -| Model | Features | Url | Recall | Precision | BLEU4 | METEOR2018 | METEOR2021 | CIDEr | SODA_c | METEOR (Para-level) | -| ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | -| PDVC_light | C3D | [Google Drive](https://drive.google.com/drive/folders/1JKOJrm5QMAkso-VJnzGnksIVqNYt8BSI?usp=sharing) | 55.30 | 58.42 | 1.55 | 7.13 | 7.66 | 24.80 | 5.23 | 14.51 | -| PDVC_light | TSN | [Google Drive](https://drive.google.com/drive/folders/1hImJ7sXABzS-ycErruLFCE_pkWEHzFSV?usp=sharing) | 55.34 | 57.97 | 1.66 | 7.41 | 7.97 | 27.23 | 5.51 | 15.00 | -| PDVC | C3D | [Google Drive](https://drive.google.com/drive/folders/1I77miVvThdMenmprgozfRsXDVoc-9TxY?usp=sharing) | 55.20 | 57.36 | 1.82 | 7.48 | 8.09 | 28.16 | 5.47 | 14.71 | -| PDVC | TSN | [Google Drive](https://drive.google.com/drive/folders/1v2Xj0Qjt3Te_SgVyySKEofRaZsSw_rjs?usp=sharing) | 56.21 | 57.46 | 1.92 | 8.00 | 8.63 | 29.00 | 5.68 | 15.85 | - - -Some notes: -* In the paper, we follow the most previous methods to use the [evaluation toolkit in ActivityNet Challenge 2018](https://github.com/ranjaykrishna/densevid_eval/tree/deba7d7e83012b218a4df888f6c971e21cfeea33). Note that the latest [evluation tookit](https://github.com/ranjaykrishna/densevid_eval/tree/9d4045aced3d827834a5d2da3c9f0692e3f33c1c) (METEOR2021) gives a higher METEOR score. -* Paragraph-level METEOR is evaluated on the ActivityNet Entity ae-val set, while others are on the standard ActivityNet Captions validation set. - - - - - # TODO - [ ] more pretrained models - [ ] support youcook2 diff --git a/eval_utils.py b/eval_utils.py index 823f8ca..d46962a 100644 --- a/eval_utils.py +++ b/eval_utils.py @@ -2,12 +2,20 @@ from __future__ import division from __future__ import print_function +import os +import sys import collections import torch import numpy as np import json from collections import OrderedDict from tqdm import tqdm +from os.path import dirname, abspath + +pdvc_dir = dirname(abspath(__file__)) +sys.path.insert(0, pdvc_dir) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) +sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) from densevid_eval3.eval_soda import eval_soda from densevid_eval3.eval_para import eval_para diff --git a/models/deformable_transformer.py b/models/deformable_transformer.py index 3223d25..89bdbd0 100644 --- a/models/deformable_transformer.py +++ b/models/deformable_transformer.py @@ -23,13 +23,11 @@ class DeformableTransformer(nn.Module): def __init__(self, d_model=256, nhead=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=1024, dropout=0.1, activation="relu", return_intermediate_dec=False, - num_feature_levels=4, dec_n_points=4, enc_n_points=4, two_stage_num_proposals=300): + num_feature_levels=4, dec_n_points=4, enc_n_points=4): super().__init__() self.d_model = d_model self.nhead = nhead - # self.two_stage = two_stage - self.two_stage_num_proposals = two_stage_num_proposals self.no_encoder = (num_encoder_layers == 0) self.num_feature_levels = num_feature_levels @@ -354,5 +352,4 @@ def build_deforamble_transformer(args): return_intermediate_dec=True, num_feature_levels=args.num_feature_levels, dec_n_points=args.dec_n_points, - enc_n_points=args.enc_n_points, - two_stage_num_proposals=args.num_queries) + enc_n_points=args.enc_n_points) diff --git a/models/pdvc.py b/models/pdvc.py index 3b2bd41..9445bca 100644 --- a/models/pdvc.py +++ b/models/pdvc.py @@ -95,8 +95,6 @@ def __init__(self, base_encoder, transformer, captioner, num_classes, num_querie self.transformer.decoder.bbox_head = None self.translator = translator - # self.rl_scorers = init_scorer(opt.rl_scorer_types, opt.cached_tokens) - # self.rl_reward_weights = {k: v for k, v in zip(opt.rl_scorer_types, opt.rl_scorer_weights)} self.disable_mid_caption_heads = opt.disable_mid_caption_heads if self.disable_mid_caption_heads: