From 81b8e9d5a0c2dbf9e52df298f25c9353fbe1c9ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=C2=A0Wang?= Date: Mon, 5 Dec 2022 21:10:18 -0800 Subject: [PATCH] Use detectron2 visualizer and update quickstart (#2502) * Adding tutorial for converting data to COCO format. * adding python script to get all unique classes in voc format, and dump all annotation xml files * adding docs * adding docs * refactor function name * Reformat * Reformat: single quote to double quote * Added inference from pretrained. Added Visualization * Adding visualization. Now `get_voc_format_classes` dumps class_names when labels.txt is not present. * Reformat previous commit. * Addressing issues in PR#2298. * Addressing usage of voc2coco.py * Reformat files to pass Lint Check * Deleting as_pandas. Adding default saving path. * Reformatting to pass Lint Check * Reformatting to pass Lint Check * Reformat imports * Adding save results to data frame. Deprecating `get_voc_format_classes` to using `get_detection_classes`. Refactoring saving results to utils. * Reformat for Lint * Reformat for Lint * Finalizing saving results; Refactored visualization into predictor.py. * Add running instructions * Lint check * Removing unused imports * Addressing issues in code review: deleting unused functions, refactoring visualization to a separate function call outside predict, removing prints, modifying doc strings, adding TODO to use mmdet visualization later, * removing mypy * Adding tutorials for running inference, saving results, and visualization. * fixing save_path issue * Added in index.rst instructions to install mmdet, add return detection as df by setting as_pandas flag, added quick start tutorial with downloading tiny motorbike, added warning in predict init for mmdet and mmcv-full, reorganized tutorials, * Add mmdet, mmcv-full links * Addressing issues in reviews * Removing .input * changing quick start coco_*.json to *_cocoformat.json. putting `mim install mmcv-full` before `pip install mmdet` * Editing visualize_detection to take as input pd.DataFrame. Fixed a bug in save results. Update tutorials * refactor text semantic search tutorial (#2375) * Minor fix to infer_speed calc (#2374) * Refactor the clip embedding tutorial (#2378) * refactor the clip embedding tutorial * address comments * Auto-switch to additive attention for FTT (#2379) * Remove build_all from platform tests (#2382) * [timeseries] Fix use of dataframe.iteritems in timeseries (#2385) * [timeseries] fix usage of pandas iteritems in timeseries * fix black and isort * [v0.6] Updated dependency versions (#2373) * Updated dependency versions * fix XGBoost slowdown by adding small min_delta to early stopping * Modified default parameters for FTT (#2386) * Modified default parameters for FTT * Modified default parameters for FTT * [Tutorial] Update AutoMM Detection Tutorial (#2380) * Updated the logic to infer rois. * remove tutorial and a minor fix * add raw_feature for hf_text * add onnx export * black * format and hp changes * black * fix get_column_features for other modality, need refactor * add export_onnx, remove index vectorization * minor fixes * black * add eval * remove hard code in get_processed_batch * remove hardcoded batch size * fix bug and refactor * black * add descriptions for functions * lint * update example and fix comments * remove extra inpit * change back config * fix label column infer in _predict * update feature extraction example * black * add test add fix several comments * black * black * lint * add onnxruntime in setup * fix * remove code for test * black * remove print in test * remove default batch test * skip onnx test for version conflict * edit setup * minor fix * change eval to evaluate * fix merge * fix merge * change cascade_rcnn_x101 to a smaller centernet in detection test * increase timeout from 15 to 120 for mim install * update centernet version * fix * change to maskrcnn * finished ft, testing val metric * complete val metric integration for obj detection * fix val bug * fix val_metric error * yolo voc finetune success * refactors * remove COLLECTION * refactors * refactor * refactor * black * small fic * small fix * black * lint * lint * lint * fix pred without label * fix ocr * add comments * add comments * fix merge * revert detection inference example change * fix a conflict in matcher and update a dependency for mmocr * decrease mim timeout to 60 second * import mmcv only when use * import mmcv only when use * black * black * bug fix * bug fix * update detection eval example * add todo mark * cleaner code and black * resolve several comments * change infer_label_column_type_by's name * fix typo * add todo mark * change constant map to mAP * change mmcv_model constant to mmlab_models * lint * add todos and add if det to reset_index * multi gpu training * black * add val_metric option and fix some TODOs * small refactors * black * lint * fix circular import * black * small fix * fix collate and better integrate with lightning for detection * remove unnecessary save/load * remove train_to_val and val_to_train in litmmdet * fix map * temp fix for MMOCR, need to update * enable multigpu inference * merge * lint * fix single gpu inference * add doc * skip ocr test for now * black * black * rename output_shape to num_classes * fix * add a TODO * add train from scratch for detection model * fix hardcoding for voc and coco while load coco format data * black * small fix and add voc2coco tool * update voc2coco.py * fix type * fix voc2coco * fix * fix classes hardcode and add support for two stage lr * fix single gpu eval and fix torchmetrics eval * add support for voc format input * fix a word * add preset, fix eval bug for voc * some refactors * black and small refactor * lint * fix assets loading * fix * black * add docs * open PIL safely to avoid resource warning * add fasterrcnn 2 stage lr and black * add set_num_gpus * add evaluation tutorial * refine documents * update ddetr * fix typo * change ref names * merge * add eval on voc and default two stage setting * finish tutorial in datasets preparation * small fix * update tutorial and codes * add EOF line and remove finetune script * black * fix typo in docs * fix wordings and add get_detection_classes, fix voc2coco, fix predict * fix type * small update * fix predict df error * fix voc2coco * add finetune tutorial and support vfnet, raise error for mask models * add voc2coco tutorial * add eof * update doc * minor * support more models and skip image size if not in config * black * update docs * add load predictor and change to problem type * update fps information * update load preditor example * fix type * update tutorial * move val_metric * add presets and fix val_metric in tuto and example * black * remove mypy in test_common * fix * change preset name * restart ci * index tutorial and fix save_path problem * fix get_num_gpu * fix save_path * fix tuto and example typo and path error * add a TODO and force detection not realtime in _predict * fix voc2coco * black * bug bash + quick start * merge * merge * fix save_path problem for DDP * black * update warning * update tutorial, lint * update tuto * update voc to coco * lint * refine index * lint * update docs in multimodal * update the multimodal index * add doc depth * fix save path init * restart ci * restart ci * fix save_path in fit * fix save_path * fix save_path * update quick start * update cli in doc * change prefix coco to postfix cocoformat for voc2coco * fix s3 path and index error * restart ci * install necessary packages * restart ci * add build mmcv in .github * fix mmlab installation * fix * fix mmcv install in tuto * fix * fix * fix index * update docs * refine quick start * remove unnecessary warnings * restart ci * replace voc2012 to 2007 * Update tutorial links (#2381) * update to raw content * trigger ci * fix inconsistencies in AutoMM tutorials (#2388) * Fix FTTransformer errors, add max_features (#2389) * Fix FTTransformer errors, add max_features * Hide progress bar if verbosity<=2 FTT * Create save_path in init and remove warning in fit (#2392) * create save_path in init and remove warning in fit * remove unecessary line * fix * [timeseries] Update tutorials and FAQ (#2355) Co-authored-by: Caner Turkmen * [Tutorial] Fix AutoMM detection tutorial layouts and cli tool (#2391) * minor fixes * black * add eval * remove hard code in get_processed_batch * remove hardcoded batch size * fix bug and refactor * black * add descriptions for functions * lint * update example and fix comments * remove extra inpit * change back config * fix label column infer in _predict * update feature extraction example * black * add test add fix several comments * black * black * lint * add onnxruntime in setup * fix * remove code for test * black * remove print in test * remove default batch test * skip onnx test for version conflict * edit setup * minor fix * change eval to evaluate * fix merge * fix merge * change cascade_rcnn_x101 to a smaller centernet in detection test * increase timeout from 15 to 120 for mim install * update centernet version * fix * change to maskrcnn * finished ft, testing val metric * complete val metric integration for obj detection * fix val bug * fix val_metric error * yolo voc finetune success * refactors * remove COLLECTION * refactors * refactor * refactor * black * small fic * small fix * black * lint * lint * lint * fix pred without label * fix ocr * add comments * add comments * fix merge * revert detection inference example change * fix a conflict in matcher and update a dependency for mmocr * decrease mim timeout to 60 second * import mmcv only when use * import mmcv only when use * black * black * bug fix * bug fix * update detection eval example * add todo mark * cleaner code and black * resolve several comments * change infer_label_column_type_by's name * fix typo * add todo mark * change constant map to mAP * change mmcv_model constant to mmlab_models * lint * add todos and add if det to reset_index * multi gpu training * black * add val_metric option and fix some TODOs * small refactors * black * lint * fix circular import * black * small fix * fix collate and better integrate with lightning for detection * remove unnecessary save/load * remove train_to_val and val_to_train in litmmdet * fix map * temp fix for MMOCR, need to update * enable multigpu inference * merge * lint * fix single gpu inference * add doc * skip ocr test for now * black * black * rename output_shape to num_classes * fix * add a TODO * add train from scratch for detection model * fix hardcoding for voc and coco while load coco format data * black * small fix and add voc2coco tool * update voc2coco.py * fix type * fix voc2coco * fix * fix classes hardcode and add support for two stage lr * fix single gpu eval and fix torchmetrics eval * add support for voc format input * fix a word * add preset, fix eval bug for voc * some refactors * black and small refactor * lint * fix assets loading * fix * black * add docs * open PIL safely to avoid resource warning * add fasterrcnn 2 stage lr and black * add set_num_gpus * add evaluation tutorial * refine documents * update ddetr * fix typo * change ref names * merge * add eval on voc and default two stage setting * finish tutorial in datasets preparation * small fix * update tutorial and codes * add EOF line and remove finetune script * black * fix typo in docs * fix wordings and add get_detection_classes, fix voc2coco, fix predict * fix type * small update * fix predict df error * fix voc2coco * add finetune tutorial and support vfnet, raise error for mask models * add voc2coco tutorial * add eof * update doc * minor * support more models and skip image size if not in config * black * update docs * add load predictor and change to problem type * update fps information * update load preditor example * fix type * update tutorial * move val_metric * add presets and fix val_metric in tuto and example * black * remove mypy in test_common * fix * change preset name * restart ci * index tutorial and fix save_path problem * fix get_num_gpu * fix save_path * fix tuto and example typo and path error * add a TODO and force detection not realtime in _predict * fix voc2coco * black * bug bash + quick start * merge * merge * fix save_path problem for DDP * black * update warning * update tutorial, lint * update tuto * update voc to coco * lint * refine index * lint * update docs in multimodal * update the multimodal index * add doc depth * fix save path init * restart ci * restart ci * fix save_path in fit * fix save_path * fix save_path * update quick start * update cli in doc * change prefix coco to postfix cocoformat for voc2coco * fix s3 path and index error * restart ci * install necessary packages * restart ci * add build mmcv in .github * fix mmlab installation * fix * fix mmcv install in tuto * fix * fix * fix index * update docs * refine quick start * remove unnecessary warnings * restart ci * replace voc2012 to 2007 * fix tuto and cli tool * merge * fix docs * change max depth * add all detection tuto in multimodal index * restart ci * fix max depth * Remove dummy layer (#2394) * Update constants.py * Update predictor.py * Update ner_text.py * Update ner_text.py * Update ner_text.py * add standalone test * fix * Update test_ner_standalone.py * Update test_ner.py * Update test_ner.py * fix * Update process_ner.py * Update test_ner_standalone.py * Update process_ner.py * Update fusion_mlp_image_text_tabular.yaml * Update model.py * Update process_ner.py * Update process_ner.py * Update ner_text.py * start ci Co-authored-by: Ubuntu * Fix leaderboard with static features (#2398) * [Tutorial] Knowledge Distillation tutorial (#2397) * Create model_distillation.md * Update automm_distillation_glue.py * Update automm_distillation_glue.py * update * Update tutorial * update * Update model_distillation.md * update * Fix seed * fix tutorial * Update model_distillation.md * Fix installation error for pycocotools package (#2400) * add eval * remove hard code in get_processed_batch * remove hardcoded batch size * fix bug and refactor * black * add descriptions for functions * lint * update example and fix comments * remove extra inpit * change back config * fix label column infer in _predict * update feature extraction example * black * add test add fix several comments * black * black * lint * add onnxruntime in setup * fix * remove code for test * black * remove print in test * remove default batch test * skip onnx test for version conflict * edit setup * minor fix * change eval to evaluate * fix merge * fix merge * change cascade_rcnn_x101 to a smaller centernet in detection test * increase timeout from 15 to 120 for mim install * update centernet version * fix * change to maskrcnn * finished ft, testing val metric * complete val metric integration for obj detection * fix val bug * fix val_metric error * yolo voc finetune success * refactors * remove COLLECTION * refactors * refactor * refactor * black * small fic * small fix * black * lint * lint * lint * fix pred without label * fix ocr * add comments * add comments * fix merge * revert detection inference example change * fix a conflict in matcher and update a dependency for mmocr * decrease mim timeout to 60 second * import mmcv only when use * import mmcv only when use * black * black * bug fix * bug fix * update detection eval example * add todo mark * cleaner code and black * resolve several comments * change infer_label_column_type_by's name * fix typo * add todo mark * change constant map to mAP * change mmcv_model constant to mmlab_models * lint * add todos and add if det to reset_index * multi gpu training * black * add val_metric option and fix some TODOs * small refactors * black * lint * fix circular import * black * small fix * fix collate and better integrate with lightning for detection * remove unnecessary save/load * remove train_to_val and val_to_train in litmmdet * fix map * temp fix for MMOCR, need to update * enable multigpu inference * merge * lint * fix single gpu inference * add doc * skip ocr test for now * black * black * rename output_shape to num_classes * fix * add a TODO * add train from scratch for detection model * fix hardcoding for voc and coco while load coco format data * black * small fix and add voc2coco tool * update voc2coco.py * fix type * fix voc2coco * fix * fix classes hardcode and add support for two stage lr * fix single gpu eval and fix torchmetrics eval * add support for voc format input * fix a word * add preset, fix eval bug for voc * some refactors * black and small refactor * lint * fix assets loading * fix * black * add docs * open PIL safely to avoid resource warning * add fasterrcnn 2 stage lr and black * add set_num_gpus * add evaluation tutorial * refine documents * update ddetr * fix typo * change ref names * merge * add eval on voc and default two stage setting * finish tutorial in datasets preparation * small fix * update tutorial and codes * add EOF line and remove finetune script * black * fix typo in docs * fix wordings and add get_detection_classes, fix voc2coco, fix predict * fix type * small update * fix predict df error * fix voc2coco * add finetune tutorial and support vfnet, raise error for mask models * add voc2coco tutorial * add eof * update doc * minor * support more models and skip image size if not in config * black * update docs * add load predictor and change to problem type * update fps information * update load preditor example * fix type * update tutorial * move val_metric * add presets and fix val_metric in tuto and example * black * remove mypy in test_common * fix * change preset name * restart ci * index tutorial and fix save_path problem * fix get_num_gpu * fix save_path * fix tuto and example typo and path error * add a TODO and force detection not realtime in _predict * fix voc2coco * black * bug bash + quick start * merge * merge * fix save_path problem for DDP * black * update warning * update tutorial, lint * update tuto * update voc to coco * lint * refine index * lint * update docs in multimodal * update the multimodal index * add doc depth * fix save path init * restart ci * restart ci * fix save_path in fit * fix save_path * fix save_path * update quick start * update cli in doc * change prefix coco to postfix cocoformat for voc2coco * fix s3 path and index error * restart ci * install necessary packages * restart ci * add build mmcv in .github * fix mmlab installation * fix * fix mmcv install in tuto * fix * fix * fix index * update docs * refine quick start * remove unnecessary warnings * restart ci * replace voc2012 to 2007 * fix tuto and cli tool * merge * fix docs * change max depth * add all detection tuto in multimodal index * restart ci * fix max depth * fix installation error for pycocotools * remove windows * Fix hyperparameters in matcher fit (#2404) * support hyperparameters in matcher fit * fix * fix * [docs] Apple Silicon Instructions (#2403) * [timeseries] make core and tabular dependencies explicit (#2405) * Turn off AutoMM prediction progress bar in tabular (#2401) * save and load enable_progress_bar * turn off progress bar for automm in tabular * Remove warnings (#2402) * replace iteritems with items * replace iteritems with items * remove seqeval warning * Update metric.py * set num_workers to 0 by default Some huggingface checkpoints such as deberta-v3, mdeberta-v3, roberta, flan-t5-xl do not work with ddp_spawn when setting num_workers=2 * update ner presets * add fixme * Update presets.py * Improve text_prediction tutorial (#2414) * fix multimodal tutorial index (#2407) * Remove warnings and duplicate function (#2409) * create save_path in init and remove warning in fit * remove unecessary line * fix * remove duplicate function * filter mmcv warning * lint * Fix cpu inference bug (#2413) * fix cpu inference bug * add test-case of cpu-only inference * Fix NER best quality preset (#2412) * Update presets.py * Update presets.py * Update presets.py * Update Quick Start Tutorial and Add Installation Warnings (#2418) * create save_path in init and remove warning in fit * remove unecessary line * fix * remove duplicate function * filter mmcv warning * lint * add warnings on import error, update quick start example * update quick start doc * fix wording * add installation info * fix * Remove dependency on vision.imagedataset (#2411) * reemove imagedataset * add datasets * move dataset download to utils * lint * lint * lint * Fixed model save and load path with uuid for multiple runs (#2415) * FTT preset (#2410) * Fix FTTransformer errors, add max_features * Add FTT presets * Added parallel bagging on CPU with FTTransformer * remove ultra preset, add docs * Set num_workers for image related presets and clean matcher config (#2416) * clean matcher config * num_workers * Fix bad refs in detection tutorial (#2419) * create save_path in init and remove warning in fit * remove unecessary line * fix * remove duplicate function * filter mmcv warning * lint * add warnings on import error, update quick start example * update quick start doc * fix wording * add installation info * fix * fix bad ref * [Tutorial] Shorten multilingual tutorial + Improve tutorial website (#2417) * improve document * shorten tutorial * Update index.rst * update * revise tutorial. * update tutorial * Fix typo * Adding inference quick start tutorial * Fixing YOLOv3 description * Moving mmdet mmcv to top of page * Changing {.python .input} to python .input * [0.6 Release][Object Detection][Tutorial] Remove mmdet output format (#2393) * Adding tutorial for converting data to COCO format. * adding python script to get all unique classes in voc format, and dump all annotation xml files * adding docs * adding docs * refactor function name * Reformat * Reformat: single quote to double quote * Added inference from pretrained. Added Visualization * Adding visualization. Now `get_voc_format_classes` dumps class_names when labels.txt is not present. * Reformat previous commit. * Addressing issues in PR#2298. * Addressing usage of voc2coco.py * Reformat files to pass Lint Check * Deleting as_pandas. Adding default saving path. * Reformatting to pass Lint Check * Reformatting to pass Lint Check * Reformat imports * Adding save results to data frame. Deprecating `get_voc_format_classes` to using `get_detection_classes`. Refactoring saving results to utils. * Reformat for Lint * Reformat for Lint * Finalizing saving results; Refactored visualization into predictor.py. * Add running instructions * Lint check * Removing unused imports * Addressing issues in code review: deleting unused functions, refactoring visualization to a separate function call outside predict, removing prints, modifying doc strings, adding TODO to use mmdet visualization later, * removing mypy * Adding tutorials for running inference, saving results, and visualization. * fixing save_path issue * Added in index.rst instructions to install mmdet, add return detection as df by setting as_pandas flag, added quick start tutorial with downloading tiny motorbike, added warning in predict init for mmdet and mmcv-full, reorganized tutorials, * Add mmdet, mmcv-full links * Addressing issues in reviews * Removing .input * changing quick start coco_*.json to *_cocoformat.json. putting `mim install mmcv-full` before `pip install mmdet` * Editing visualize_detection to take as input pd.DataFrame. Fixed a bug in save results. Update tutorials * editting tutorials to reflect pd.DataFrame detection output. * Update tutorials to print pred * Update inference quick start tutorial to single image * Update inference quick start tutorial to single image; Changing matplotlib to PIL for displaying visualization. * Update inference quick start tutorial to single image; Changing matplotlib to PIL for displaying visualization. * Fixing quick start import error with numpy. * Fixed links in inference tutorials, moved inference quick start to quick_start, fixed a typo in detection_eval_fasterrcnn_coco * fixing kernel died issue * Removing quick start to another PR * Remove detection load predictor * Changing {.python} to python * Removing quick start in index.rst * Update to v0.6 (#2425) * [Release 0.6] [Multimodal] Check if folder is empty before raise (#2426) * fix * fix * lint * fix * fix * Update to v0.6.1 * bump evaluate to `0.3.0` (#2433) * [Cloud] Cloud Custom Image and Cleanup (#2408) * checkpoint * checkpoint * api * cleanup old version code * fix to local * fix * fix * fix * additional volume size * fix * fix * addressing comments * fix Co-authored-by: Weisu Yin * fix (#2437) Co-authored-by: Weisu Yin * 0.6.0 Release notes (#2383) * Add finetune/eval tests for AutoMM detection (#2441) * create save_path in init and remove warning in fit * remove unecessary line * fix * add fintune/eval test for detection * lint * fix layout (#2450) * [CI][Cloud] Nightly Build of AG Images for Cloud Testing (#2436) * initial fix fix fix checkpoint checkpoint checkpoint api cleanup old version code fix to local fix fix fix additional volume size fix fix tests * fix * fix * enable nightly for cloud CI * minor fix entry point * comments Co-authored-by: Weisu Yin * Adding Joint IA3_LoRA as efficient finetuning strategy (#2451) * adding ia3_lora peft and raft#1 preset * fix * dynamic max length template choice; * fix trigger of PEFT * add coment for 11B model in preset Co-authored-by: Ubuntu * Fix AutoMM warnings about object detection (#2458) * fix warnings * ocr constant * [CI][Fair] Enable CI for Fair Module (#2460) * fair ci * fix * [Cloud][CI] Enable Cloud Lint (#2455) * lint * fix * fix * isort skip * gox * fix * fix * fix * fix Co-authored-by: Weisu Yin * adding python script to get all unique classes in voc format, and dump all annotation xml files * Reformat previous commit. * Adding save results to data frame. Deprecating `get_voc_format_classes` to using `get_detection_classes`. Refactoring saving results to utils. * Addressing issues in code review: deleting unused functions, refactoring visualization to a separate function call outside predict, removing prints, modifying doc strings, adding TODO to use mmdet visualization later, * Added in index.rst instructions to install mmdet, add return detection as df by setting as_pandas flag, added quick start tutorial with downloading tiny motorbike, added warning in predict init for mmdet and mmcv-full, reorganized tutorials, * Deleting inference tutorials, refactoring inference quick start to quick_start_coco.md, fixing the bug for fit->predict with dummy df converter from dict, updating detection unit test * Deleting inference tutorials, refactoring inference quick start to quick_start_coco.md, fixing the bug for fit->predict with dummy df converter from dict, updating detection unit test * Addressing comments in reviews. * Re-organizing tutorial, picking a different image to display * Deleting inference cards in the object detection index page * Adding visualizer from detectron2 * Adding visualizer from detectron2 - Complete. Updating quick_start_coco.md * Editing docs for colormap.py * Deleting unused classes. Reformatting for lint. * Reformatting for lint. * Adding new line * add back newline under cv2 * removing __main__ in colormap as it is not necessary Co-authored-by: Zhiqiang Tang Co-authored-by: Nick Erickson Co-authored-by: BingzhaoZhu <39958219+BingzhaoZhu@users.noreply.github.com> Co-authored-by: tonyhu Co-authored-by: Caner Turkmen Co-authored-by: Haoyang Fang <107515844+FANGAreNotGnu@users.noreply.github.com> Co-authored-by: Yi Zhu Co-authored-by: Oleksandr Shchur Co-authored-by: Shuai Zhang Co-authored-by: Ubuntu Co-authored-by: Xingjian Shi Co-authored-by: Alexander Shirkov <10080307+gradientsky@users.noreply.github.com> Co-authored-by: Weisu Yin Co-authored-by: Alexander Shirkov Co-authored-by: Leandro von Werra Co-authored-by: Weisu Yin Co-authored-by: Rami Co-authored-by: Ubuntu --- .../quick_start/quick_start_coco.md | 17 +- .../autogluon/multimodal/utils/__init__.py | 1 + .../autogluon/multimodal/utils/colormap.py | 148 ++++++ .../utils/object_detection_visualizer.py | 493 ++++++++++++++++++ 4 files changed, 650 insertions(+), 9 deletions(-) create mode 100644 multimodal/src/autogluon/multimodal/utils/colormap.py create mode 100644 multimodal/src/autogluon/multimodal/utils/object_detection_visualizer.py diff --git a/docs/tutorials/multimodal/object_detection/quick_start/quick_start_coco.md b/docs/tutorials/multimodal/object_detection/quick_start/quick_start_coco.md index 06099a4c3c9..5d57c1466c4 100644 --- a/docs/tutorials/multimodal/object_detection/quick_start/quick_start_coco.md +++ b/docs/tutorials/multimodal/object_detection/quick_start/quick_start_coco.md @@ -204,21 +204,20 @@ To run visualizations, ensure that you have `opencv` installed. If you haven't a To visualize the detection bounding boxes, run the following: ```python .input -from autogluon.multimodal.utils import visualize_detection +from autogluon.multimodal.utils import Visualizer conf_threshold = 0.4 # Specify a confidence threshold to filter out unwanted boxes -visualization_result_dir = "./" # Use the pwd as result dir to save the visualized image +image_result = pred.iloc[30] -visualized = visualize_detection( - pred=pred[30:31], - detection_classes=predictor.get_predictor_classes(), - conf_threshold=conf_threshold, - visualization_result_dir=visualization_result_dir, -) +img_path = image_result.image # Select an image to visualize + +visualizer = Visualizer(img_path) # Initialize the Visualizer +out = visualizer.draw_instance_predictions(image_result, conf_threshold=conf_threshold) # Draw detections +visualized = out.get_image() # Get the visualized image from PIL import Image from IPython.display import display -img = Image.fromarray(visualized[0][:, :, ::-1], 'RGB') +img = Image.fromarray(visualized, 'RGB') display(img) ``` diff --git a/multimodal/src/autogluon/multimodal/utils/__init__.py b/multimodal/src/autogluon/multimodal/utils/__init__.py index 8c572b0c2d1..d9127104127 100644 --- a/multimodal/src/autogluon/multimodal/utils/__init__.py +++ b/multimodal/src/autogluon/multimodal/utils/__init__.py @@ -57,6 +57,7 @@ save_result_voc_format, visualize_detection, ) +from .object_detection_visualizer import Visualizer from .onnx import get_onnx_input from .pipeline import init_pretrained, init_pretrained_matcher from .save import process_save_path, save_pretrained_model_configs, save_text_tokenizers, setup_save_path diff --git a/multimodal/src/autogluon/multimodal/utils/colormap.py b/multimodal/src/autogluon/multimodal/utils/colormap.py new file mode 100644 index 00000000000..6015f1b0e1b --- /dev/null +++ b/multimodal/src/autogluon/multimodal/utils/colormap.py @@ -0,0 +1,148 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +""" +An awesome colormap for really neat visualizations. +Copied from Detectron, and removed gray colors. +""" +import random + +import numpy as np + +__all__ = ["colormap", "random_color", "random_colors"] + +# fmt: off +# RGB: +_COLORS = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.857, 0.857, 0.857, + 1.000, 1.000, 1.000 + ] +).astype(np.float32).reshape(-1, 3) +# fmt: on + + +def colormap(rgb=False, maximum=255): + """ + Parameters + ---------- + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns + ------- + ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] + """ + assert maximum in [255, 1], maximum + c = _COLORS * maximum + if not rgb: + c = c[:, ::-1] + return c + + +def random_color(rgb=False, maximum=255): + """ + Parameters + ---------- + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns + ------- + ndarray: a vector of 3 numbers + """ + idx = np.random.randint(0, len(_COLORS)) + ret = _COLORS[idx] * maximum + if not rgb: + ret = ret[::-1] + return ret + + +def random_colors(N, rgb=False, maximum=255): + """ + Parameters + ---------- + N (int): number of unique colors needed + rgb (bool): whether to return RGB colors or BGR colors. + maximum (int): either 255 or 1 + + Returns + ------- + ndarray: a list of random_color + """ + indices = random.sample(range(len(_COLORS)), N) + ret = [_COLORS[i] * maximum for i in indices] + if not rgb: + ret = [x[::-1] for x in ret] + return ret diff --git a/multimodal/src/autogluon/multimodal/utils/object_detection_visualizer.py b/multimodal/src/autogluon/multimodal/utils/object_detection_visualizer.py new file mode 100644 index 00000000000..b57f0ca7828 --- /dev/null +++ b/multimodal/src/autogluon/multimodal/utils/object_detection_visualizer.py @@ -0,0 +1,493 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# Disclaimer: Special thanks to the Detectron2 developers +# https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/visualizer.py! +# We use part of its provided, open-source functionalities. + +import colorsys +import logging +from enum import Enum, unique +from typing import List + +import matplotlib as mpl +import matplotlib.colors as mplc +import matplotlib.figure as mplfigure +import numpy as np +import pandas as pd +from matplotlib.backends.backend_agg import FigureCanvasAgg + +from .colormap import random_color + +logger = logging.getLogger(__name__) + +__all__ = ["ColorMode", "VisImage", "Visualizer"] + + +_SMALL_OBJECT_AREA_THRESH = 1000 +_LARGE_MASK_AREA_THRESH = 120000 +_OFF_WHITE = (1.0, 1.0, 240.0 / 255) +_BLACK = (0, 0, 0) +_RED = (1.0, 0, 0) + +_KEYPOINT_THRESHOLD = 0.05 + + +@unique +class ColorMode(Enum): + """ + Enum of different color modes to use for instance visualizations. + """ + + IMAGE = 0 + """ + Picks a random color for every instance and overlay segmentations with low opacity. + """ + SEGMENTATION = 1 + """ + Let instances of the same category have similar colors + (from metadata.thing_colors), and overlay them with + high opacity. This provides more attention on the quality of segmentation. + """ + IMAGE_BW = 2 + """ + Same as IMAGE, but convert all areas without masks to gray-scale. + Only available for drawing per-instance mask predictions. + """ + + +def _create_text_labels(classes: List[str], scores: List[float]): + """ + Create the label tags for visualization + Parameters + ---------- + classes (list[str]): class names for all the detected instances + scores (list[float]); detection confidence scores for all the detected instances + + Returns + ------- + labels (list[str]): label tags for visualization + """ + labels = None + if classes is not None: + labels = classes + + if scores is not None: + if labels is None: + labels = ["{:.0f}%".format(s * 100) for s in scores] + else: + labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)] + return labels + + +class VisImage: + def __init__(self, img, scale=1.0): + """ + Parameters + ---------- + img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255]. + scale (float): scale the input image + """ + self.img = img + self.scale = scale + self.width, self.height = img.shape[1], img.shape[0] + self._setup_figure(img) + + def _setup_figure(self, img): + """ + Parameters + ---------- + Same as in :meth:`__init__()`. + + Returns + ------- + fig (matplotlib.pyplot.figure): top level container for all the image plot elements. + ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system. + """ + fig = mplfigure.Figure(frameon=False) + self.dpi = fig.get_dpi() + # add a small 1e-2 to avoid precision lost due to matplotlib's truncation + # (https://github.com/matplotlib/matplotlib/issues/15363) + fig.set_size_inches( + (self.width * self.scale + 1e-2) / self.dpi, + (self.height * self.scale + 1e-2) / self.dpi, + ) + self.canvas = FigureCanvasAgg(fig) + # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig) + ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) + ax.axis("off") + self.fig = fig + self.ax = ax + self.reset_image(img) + + def reset_image(self, img): + """ + Parameters + ---------- + img: same as in __init__ + """ + img = img.astype("uint8") + self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest") + + def save(self, filepath): + """ + Parameters + ---------- + filepath (str): a string that contains the absolute path, including the file name, where + the visualized image will be saved. + """ + self.fig.savefig(filepath) + + def get_image(self): + """ + Returns + ------- + ndarray: + the visualized image of shape (H, W, 3) (RGB) in uint8 type. + The shape is scaled w.r.t the input image using the given `scale` argument. + """ + canvas = self.canvas + s, (width, height) = canvas.print_to_buffer() + # buf = io.BytesIO() # works for cairo backend + # canvas.print_rgba(buf) + # width, height = self.width, self.height + # s = buf.getvalue() + + buffer = np.frombuffer(s, dtype="uint8") + + img_rgba = buffer.reshape(height, width, 4) + rgb, alpha = np.split(img_rgba, [3], axis=2) + return rgb.astype("uint8") + + +class Visualizer: + """ + Visualizer that draws data about detection on images. + + It contains methods like `draw_{text,box}` + that draw primitive objects to images, as well as high-level wrappers like + `draw_{instance_predictions}` that draw composite data in some pre-defined style. + + Note that the exact visualization style for the high-level wrappers are subject to change. + Style such as color, opacity, label contents, visibility of labels, or even the visibility + of objects themselves (e.g. when the object is too small) may change according + to different heuristics, as long as the results still look visually reasonable. + + To obtain a consistent style, you can implement custom drawing functions with the + abovementioned primitive methods instead. This class does not intend to satisfy + everyone's preference on drawing styles. + + This visualizer focuses on high rendering quality rather than performance. It is not + designed to be used for real-time applications. + """ + + def __init__(self, img_path, scale=1.0, instance_mode=ColorMode.IMAGE): + """ + Parameters + ---------- + img_rgb: a numpy array of shape (H, W, C), where H and W correspond to + the height and width of the image respectively. C is the number of + color channels. The image is required to be in RGB format since that + is a requirement of the Matplotlib library. The image is also expected + to be in the range [0, 255]. + metadata (Metadata): dataset metadata (e.g. class names and colors) + instance_mode (ColorMode): defines one of the pre-defined style for drawing + instances on an image. + """ + try: + import cv2 + except: + raise ImportError("No module named: cv2. Please install cv2 by 'pip install opencv-python'") + + img_rgb = cv2.imread(img_path) + img_rgb = img_rgb[:, :, ::-1] + self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8) + self.output = VisImage(self.img, scale=scale) + + # too small texts are useless, therefore clamp to 9 + self._default_font_size = max(np.sqrt(self.output.height * self.output.width) // 90, 10 // scale) + self._instance_mode = instance_mode + + @staticmethod + def process_predictions(predictions: pd.DataFrame, conf_threshold: float = 0.4): + """ + Process the classes, box coordinates and confidence scores of the predictions in the image + + Parameters + ---------- + predictions (pd.DataFrame): the output of object detection with 2 attributes: + "image": containing paths to the source image + "bboxes": containing detection results for the images with the following format + {"class": , "bbox": [x1, y1, x2, y2], "score": } + conf_threshold (float): detection confidence threshold to display instances + + Returns + ------- + boxes: XYXY format of bounding boxes shape = (N, 4) + scores: detection confidence scores, shape = (N, ) + classes: detection classes, shape = (N, ) + """ + boxes, scores, classes = [], [], [] + instances = predictions["bboxes"] + for instance in instances: + s = instance["score"] + if s >= conf_threshold: + box = instance["bbox"] + c = instance["class"] + boxes.append(box) + scores.append(s) + classes.append(c) + boxes = np.array(boxes) + scores = np.array(scores) + classes = np.array(classes) + assert ( + len(boxes) == len(scores) == len(classes) + ), "Expected boxes, scores and classes to have the same length, but got len(boxes): {}, len(scores) = {}, len(classes) = {}".format( + len(boxes), len(scores), len(classes) + ) + if len(boxes) == 0: + return None, None, None + return boxes, scores, classes + + def draw_instance_predictions(self, predictions: pd.DataFrame, conf_threshold: float = 0.4): + """ + Draw instance-level prediction results on an image. + + Parameters + ---------- + predictions (pd.DataFrame): the output of object detection for that image, with 2 attributes: + "image": containing paths to the source image + "bboxes": containing detection results for the images with the following format + {"class": , "bbox": [x1, y1, x2, y2], "score": } + conf_threshold (float): detection confidence threshold to display instances + + Returns + ------- + output (VisImage): image object with visualizations. + """ + boxes, scores, classes = self.process_predictions(predictions, conf_threshold=conf_threshold) + labels = _create_text_labels(classes, scores) + colors = None + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.reset_image( + self._create_grayscale_image( + (predictions.pred_masks.any(dim=0) > 0).numpy() if predictions.has("pred_masks") else None + ) + ) + + self.overlay_instances( + boxes=boxes, + labels=labels, + assigned_colors=colors, + ) + return self.output + + def overlay_instances( + self, + *, + boxes=None, + labels=None, + assigned_colors=None, + ): + """ + Draw the visualizations + Parameters + ---------- + boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, + or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, + or a :class:`RotatedBoxes`, + or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format + for the N objects in a single image, + labels (list[str]): the text to be displayed for each instance. + assigned_colors (list[matplotlib.colors]): a list of colors, where each color + corresponds to each mask or box in the image. Refer to 'matplotlib.colors' + for full list of formats that the colors are accepted in. + Returns + ------- + output (VisImage): image object with visualizations. + """ + num_instances = 0 + if boxes is not None: + num_instances = len(boxes) + if labels is not None: + assert len(labels) == num_instances + if assigned_colors is None: + assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] + if num_instances == 0: + return self.output + + # Display in largest to smallest order to reduce occlusion. + areas = None + if boxes is not None: + areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) + + if areas is not None: + sorted_idxs = np.argsort(-areas).tolist() + # Re-order overlapped instances in descending order. + boxes = boxes[sorted_idxs] if boxes is not None else None + labels = [labels[k] for k in sorted_idxs] if labels is not None else None + assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] + + for i in range(num_instances): + color = assigned_colors[i] + if boxes is not None: + self.draw_box(boxes[i], edge_color=color) + + if labels is not None: + # first get a box + if boxes is not None: + x0, y0, x1, y1 = boxes[i] + text_pos = (x0, y0) # if drawing boxes, put text on the box corner. + horiz_align = "left" + else: + continue # drawing the box confidence for keypoints isn't very useful. + # for small objects, draw text at the side to avoid occlusion + instance_area = (y1 - y0) * (x1 - x0) + if instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale or y1 - y0 < 40 * self.output.scale: + if y1 >= self.output.height - 5: + text_pos = (x1, y0) + else: + text_pos = (x0, y1) + + height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) + lighter_color = self._change_color_brightness(color, brightness_factor=0.7) + font_size = np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size + self.draw_text( + labels[i], + text_pos, + color=lighter_color, + horizontal_alignment=horiz_align, + font_size=font_size, + ) + + return self.output + + """ + Primitive drawing functions: + """ + + def draw_text( + self, + text, + position, + *, + font_size=None, + color="g", + horizontal_alignment="center", + rotation=0, + ): + """ + Parameters + ---------- + text (str): class label + position (tuple): a tuple of the x and y coordinates to place text on image. + font_size (int, optional): font of the text. If not provided, a font size + proportional to the image width is calculated and used. + color: color of the text. Refer to `matplotlib.colors` for full list + of formats that are accepted. + horizontal_alignment (str): see `matplotlib.text.Text` + rotation: rotation angle in degrees CCW + + Returns + ------- + output (VisImage): image object with text drawn. + """ + if not font_size: + font_size = self._default_font_size + + # since the text background is dark, we don't want the text to be dark + color = np.maximum(list(mplc.to_rgb(color)), 0.2) + color[np.argmax(color)] = max(0.8, np.max(color)) + + x, y = position + self.output.ax.text( + x, + y, + text, + size=font_size * self.output.scale, + family="sans-serif", + bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"}, + verticalalignment="top", + horizontalalignment=horizontal_alignment, + color=color, + zorder=10, + rotation=rotation, + ) + return self.output + + def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"): + """ + Parameters + ---------- + box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0 + are the coordinates of the image's top left corner. x1 and y1 are the + coordinates of the image's bottom right corner. + alpha (float): blending efficient. Smaller values lead to more transparent masks. + edge_color: color of the outline of the box. Refer to `matplotlib.colors` + for full list of formats that are accepted. + line_style (string): the string to use to create the outline of the boxes. + + Returns + ------- + output (VisImage): image object with box drawn. + """ + x0, y0, x1, y1 = box_coord + width = x1 - x0 + height = y1 - y0 + + linewidth = max(self._default_font_size / 4, 1) + + self.output.ax.add_patch( + mpl.patches.Rectangle( + (x0, y0), + width, + height, + fill=False, + edgecolor=edge_color, + linewidth=linewidth * self.output.scale, + alpha=alpha, + linestyle=line_style, + ) + ) + return self.output + + """ + Internal methods: + """ + + def _create_grayscale_image(self, mask=None): + """ + Create a grayscale version of the original image. + The colors in masked area, if given, will be kept. + """ + img_bw = self.img.astype("f4").mean(axis=2) + img_bw = np.stack([img_bw] * 3, axis=2) + if mask is not None: + img_bw[mask] = self.img[mask] + return img_bw + + def _change_color_brightness(self, color, brightness_factor): + """ + Depending on the brightness_factor, gives a lighter or darker color i.e. a color with + less or more saturation than the original color. + + Parameters + ---------- + color: color of the polygon. Refer to `matplotlib.colors` for a full list of + formats that are accepted. + brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of + 0 will correspond to no change, a factor in [-1.0, 0) range will result in + a darker color and a factor in (0, 1.0] range will result in a lighter color. + + Returns + ------- + modified_color (tuple[double]): a tuple containing the RGB values of the + modified color. Each value in the tuple is in the [0.0, 1.0] range. + """ + assert brightness_factor >= -1.0 and brightness_factor <= 1.0 + color = mplc.to_rgb(color) + polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color)) + modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1]) + modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness + modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness + modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2]) + return modified_color