-
Notifications
You must be signed in to change notification settings - Fork 450
/
generate_text_prompts.py
36 lines (29 loc) · 1.15 KB
/
generate_text_prompts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
import argparse
import numpy as np
from transformers import (AutoTokenizer, CLIPTextModelWithProjection)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--model',
type=str,
default='./pretrained_models/clip-vit-base-patch32-projection')
parser.add_argument('--text',
type=str,
default='data/captions/coco_class_captions.json')
parser.add_argument('--out', type=str, default='output.npy')
args = parser.parse_args()
tokenizer = AutoTokenizer.from_pretrained(args.model)
model = CLIPTextModelWithProjection.from_pretrained(args.model)
with open(args.text) as f:
data = json.load(f)
texts = [x[0] for x in data]
device = 'cuda:0'
model.to(device)
texts = tokenizer(text=texts, return_tensors='pt', padding=True)
texts = texts.to(device)
text_outputs = model(**texts)
txt_feats = text_outputs.text_embeds
txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
txt_feats = txt_feats.reshape(-1, txt_feats.shape[-1])
np.save(args.out, txt_feats.cpu().data.numpy())