work on cleanup

deeplearning-wisc · Jul 31, 2022 · e2351ee · e2351ee
1 parent 908797d
commit e2351ee
Show file tree

Hide file tree

Showing 6 changed files with 32 additions and 52 deletions.
diff --git a/README.md b/README.md
@@ -1,22 +1,32 @@
-# Project Structure (to be updated)
-Out-of-distribution with language supervision
-- Supported dataset: 'CIFAR-10', 'CIFAR-100', 'ImageNet', 'ImageNet10', 'ImageNet100', 'ImageNet-subset','ImageNet-dogs', 'bird200', 'car196','flower102','food101','pet37'
--  `eval_ood_detection.py`: Perform OOD detection. Supported scores:
-    -  'Maha', 'knn', 'analyze', # img encoder only; feature space 
-    - 'energy', 'entropy', 'odin', # img->text encoder; feature space
-    - 'MIP', 'MIPT','MIPT-wordnet', 'fingerprint', 'MIP_topk', # img->text encoder; feature space
-    - 'MSP', 'energy_logits', 'odin_logits', # img encoder only; logit space
-    - 'MIPCT', 'MIPCI', 'retrival', 'nouns' # text->img encoder; feature space
-
-- `play_with_clip.py`: ID zero-shot classification and ID fine-tuning (with img encoder). Currently we have three options: 
-   -  evaluate zero shot performance of CLIP: call `zero_shot_evaluation_CLIP(image_dataset_name, test_labels, ckpt)`
-   -  fine-tune CLIP image encoder and test (linear probe): call `linear_probe_evaluation_CLIP(image_dataset_name)`
-   -  play with SkImages: call `play_with_skimage()`
-
-- `play_with_clip.ipynb`: contains various visualization methods for trained CLIP model.
-
-- `captions.ipynb`: Notebook used to generated captions using the Oscar model from Microsoft. This assumes you have
-cloned and installed [Oscar](https://github.com/microsoft/Oscar) and
-[scene\_graph\_benchmark](https://github.com/microsoft/scene_graph_benchmark) in the directory running the notebook
-from (you can change these directories in the notebook).
+# Delving into OOD Detection with Vision-Language Representations
 
+Recognizing out-of-distribution (OOD) samples is critical for machine learning systems deployed in the open world. The vast majority of OOD detection methods are driven by a single modality (e.g., either vision or language), leaving the rich information in multi-modal representations untapped. Inspired by the recent success of vision-language pre-training, this paper enriches the landscape of OOD detection from a single-modal to a multi-modal regime. Particularly, we propose Maximum Concept Matching (MCM), a simple yet effective zero-shot OOD detection method based on aligning visual features with textual concepts. We contribute in-depth analysis and theoretical insights to understand the effectiveness of MCM. Extensive experiments demonstrate that our proposed MCM achieves superior performance on a wide variety of real-world tasks. MCM with vision-language features outperforms a common baseline with pure visual features on a hard OOD task with semantically similar classes by 56.60% (FPR95).
+
+# Links
+
+ArXiv
+
+# Environment Setup
+
+# Data Preparation
+
+For complete information, refer to Appendix B.3 of the paper.
+
+## In-distribution Datasets
+
+- [`CUB-200`](http://www.vision.caltech.edu/datasets/cub_200_2011/), [`Standford-Cars`](http://ai.stanford.edu/~jkrause/cars/car_dataset.html), [`Food-101`](https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/), [`Oxford-Pet`](https://www.robots.ox.ac.uk/~vgg/data/pets/)
+- [`ImageNet`](https://image-net.org/challenges/LSVRC/2012/index.php#), `ImageNet-10`, `ImageNet-20`
+
+Please download ImageNet from the link; the other datasets can be automatically downloaded as the experiments run. The default dataset location is `./datasets/`, which can be changed in `settings.yaml`. The overall file structure:
+
+```
+CLIP_OOD
+|-- datasets
+    |-- ImageNet
+    |-- ImageNet-10
+        |-- classlist.csv
+    |-- ImageNet-20
+        |-- classlist.csv
+```
+
+# Experiments
diff --git a/plot_custom_img.py → plotting/plot_custom_img.py b/plot_custom_img.py → plotting/plot_custom_img.py
diff --git a/plot_fg_long.py → plotting/plot_fg_long.py b/plot_fg_long.py → plotting/plot_fg_long.py
diff --git a/plot_fg_square.py → plotting/plot_fg_square.py b/plot_fg_square.py → plotting/plot_fg_square.py
diff --git a/plot_imagenet_subset.py → plotting/plot_imagenet_subset.py b/plot_imagenet_subset.py → plotting/plot_imagenet_subset.py
diff --git a/utils/common.py b/utils/common.py
@@ -44,9 +44,6 @@ def obtain_ImageNet_classes(loc, option = 'clean'):
     return imagenet_cls
 
 def obtain_ImageNet10_classes(loc = None):
-    # class_dict = {'plane': 'n04552348', 'car': 'n04285008', 'bird': 'n01530575', 'cat':'n02123597', 
-    #     'antelope' : 'n02422699', 'dog':'n02107574', 'frog':'n01641577',  'snake':'n01728572', 
-    #     'ship':'n03095699', 'truck':'n03417042'}
 
     class_dict =   {"warplane": "n04552348", "sports car":"n04285008", 
         'brambling bird':'n01530575', "Siamese cat": 'n02123597', 
@@ -69,6 +66,7 @@ def obtain_ImageNet20_classes(loc = None):
 
 
 def obtain_ImageNet30_classes():
+
     all_labels = ['stingray', 'american_alligator', 'dragonfly', 'airliner', 
     'ambulance', 'banjo', 'barn', 'bikini', 'rotary_dial_telephone', 'digital_clock',
      'dumbbell', 'forklift', 'goblet', 'grand_piano', 'hourglass', 'manhole_cover', 
@@ -93,34 +91,6 @@ def obtain_ImageNet100_classes(loc):
     class_name_set = [x.replace('_', ' ') for x in class_name_set]
     return class_name_set
 
-def obtain_ImageNet_subset_classes(loc):
-    # sort by values
-    with open(os.path.join(loc, 'class_list.txt')) as f:
-        class_set = [line.strip() for line in f.readlines()]
-
-    class_name_set = []
-    with open('data/ImageNet/imagenet_class_index.json') as file: 
-        class_index_raw = json.load(file)
-        class_index = {cid: class_name for cid, class_name in class_index_raw.values()}
-        class_name_set = [class_index[c] for c in class_set]
-    class_name_set = [x.replace('_', ' ') for x in class_name_set]
-
-    return class_name_set
-
-def obtain_ImageNet_dogs_classes(args, loc):
-    # sort by values
-    with open(os.path.join(loc, f'in_{args.n_cls}_seed_{args.seed}', 'class_list.txt')) as f:
-        class_set = [line.strip() for line in f.readlines()[0:args.num_imagenet_cls]]
-
-    class_name_set = []
-    with open('data/ImageNet/imagenet_class_index.json') as file: 
-        class_index_raw = json.load(file)
-        class_index = {cid: class_name for cid, class_name in class_index_raw.values()}
-        class_name_set = [class_index[c] for c in class_set]
-    class_name_set = [x.replace('_', ' ') for x in class_name_set]
-
-    return class_name_set
-
 def get_num_cls(args):    
     NUM_CLS_DICT = {
         'CIFAR-10': 10, 'ImageNet10': 10,