forked from Alpha-VLLM/Lumina-mGPT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinference.py
116 lines (91 loc) · 3.53 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
from datetime import datetime
from lumina_mgpt.inference_solver import FlexARInferenceSolver
from PIL import Image
from xllmx.util.misc import random_seed
random_seed(0)
# ******************** Image Generation ********************
inference_solver = FlexARInferenceSolver(
model_path="Alpha-VLLM/Lumina-mGPT-7B-768",
precision="bf16",
target_size=768,
)
q1 = f"Generate an image of 768x768 according to the following prompt:\nImage of a dog playing water, and a waterfall is in the background."
# generated: tuple of (generated response, list of generated images)
generated = inference_solver.generate(
images=[],
qas=[[q1, None]],
max_gen_len=8192,
temperature=1.0,
logits_processor=inference_solver.create_logits_processor(
cfg=4.0, image_top_k=2000
),
)
# 이미지 생성 마지막 부분
"""
Allocated memory | 17287 MiB | 17324 MiB | 5663 GiB | 5646 GiB |
| from large pool | 17256 MiB | 17293 MiB | 5553 GiB | 5536 GiB |
| from small pool | 31 MiB | 105 MiB | 109 GiB | 109 GiB |
|---------------------------------------------------------------------------|
| Active memory | 17287 MiB | 17324 MiB | 5663 GiB | 5646 GiB |
| from large pool | 17256 MiB | 17293 MiB | 5553 GiB | 5536 GiB |
| from small pool | 31 MiB | 105 MiB | 109 GiB | 109 GiB |
|---------------------------------------------------------------------------|
| Requested memory | 17264 MiB | 17300 MiB | 5611 GiB | 5594 GiB |
| from large pool | 17232 MiB | 17269 MiB | 5502 GiB | 5485 GiB |
| from small pool | 31 MiB | 105 MiB | 108 GiB | 108 GiB
"""
a1, new_image = generated[0], generated[1][0]
now = datetime.now()
save_path = f"output_Alpha-VLLM/Lumina-mGPT-7B-768_output_{a1}.png"
if not os.path.exists(save_path):
os.makedirs(os.path.dirname(save_path), exist_ok=True)
new_image.save(save_path)
# ******************* Image Understanding ******************
inference_solver = FlexARInferenceSolver(
model_path="Alpha-VLLM/Lumina-mGPT-7B-512",
precision="bf16",
target_size=512,
)
# "<|image|>" symbol will be replaced with sequence of image tokens before fed to LLM
q1 = "Describe the image in detail. <|image|>"
images = [Image.open(save_path)]
qas = [[q1, None]]
# `len(images)` should be equal to the number of appearance of "<|image|>" in qas
generated = inference_solver.generate(
images=images,
qas=qas,
max_gen_len=8192,
temperature=1.0,
logits_processor=inference_solver.create_logits_processor(
cfg=4.0, image_top_k=2000
),
)
a1 = generated[0]
print(a1)
# save the generated text to a file
with open(f"{save_path}_description.txt", "w") as f:
f.write(a1)
# generated[1], namely the list of newly generated images, should typically be empty in this case.
# ********************* Omni-Potent *********************
inference_solver = FlexARInferenceSolver(
model_path="Alpha-VLLM/Lumina-mGPT-7B-768-Omni",
precision="bf16",
target_size=768,
)
# Example: Depth Estimation
# For more instructions, see demos/demo_image2image.py
q1 = "Depth estimation. <|image|>"
images = [Image.open(save_path)]
qas = [[q1, None]]
generated = inference_solver.generate(
images=images,
qas=qas,
max_gen_len=8192,
temperature=1.0,
logits_processor=inference_solver.create_logits_processor(cfg=1.0, image_top_k=200),
)
a1 = generated[0]
new_image = generated[1][0]
save_path = f"output_Alpha-VLLM/Lumina-mGPT-7B-768-Omni_{a1}.png"
new_image.save(save_path)