Skip to content

Commit 3931f65

Browse files
larryliu0820facebook-github-bot
authored andcommitted
Add BUCK files for llava python and C++ libs (#8297)
Summary: Add BUCK targets so it can be used in fbcode, bento etc. Reviewed By: luyich Differential Revision: D69278781
1 parent 8ee637e commit 3931f65

File tree

4 files changed

+46
-2
lines changed

4 files changed

+46
-2
lines changed

examples/models/llava/export_llava.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def export(self) -> "LlavaEdgeManager":
6767
dynamic_shapes=dynamic_shape,
6868
strict=False,
6969
)
70+
# pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`.
7071
self.pre_autograd_graph_module = self.export_program.module()
7172
return self
7273

examples/models/llava/image_util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
logging.basicConfig(level=logging.INFO, format=FORMAT)
2222

2323

24+
# pyre-ignore: Undefined or invalid type [11]: Annotation `Image` is not defined as a type.
2425
def prepare_image(image: Image, target_h: int, target_w: int) -> torch.Tensor:
2526
"""Read image into a tensor and resize the image so that it fits in
2627
a target_h x target_w canvas.

examples/models/llava/model.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(
4848
self.use_sdpa_with_kv_cache_op = use_sdpa_with_kv_cache_op
4949
self.model_ = llava_model
5050
self.image_processor = image_processor
51+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `config`.
5152
self.vision_feature_layer = self.model_.config.vision_feature_layer
5253
self.vision_feature_select_strategy = (
5354
self.model_.config.vision_feature_select_strategy
@@ -76,6 +77,7 @@ def __init__(
7677
)
7778

7879
def _translate_state_dict_for_text_model(self) -> Dict[str, Any]:
80+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `language_model`.
7981
state_dict = self.model_.language_model.state_dict()
8082
key_map = {
8183
# fmt: off
@@ -128,9 +130,11 @@ def get_model(self):
128130
return self.model_.get_model()
129131

130132
def embed_tokens(self, tokens: torch.Tensor) -> torch.Tensor:
133+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `language_model`.
131134
return self.model_.language_model.model.embed_tokens(tokens)
132135

133136
def encode_images(self, images: torch.Tensor) -> torch.Tensor:
137+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `dtype`.
134138
images = images.to(dtype=self.model_.dtype)
135139
if type(images) is list:
136140
image_features = []
@@ -144,15 +148,19 @@ def encode_images(self, images: torch.Tensor) -> torch.Tensor:
144148
image_feature = self._feature_select(image_forward_out).to(image.dtype)
145149
image_features.append(image_feature)
146150
else:
151+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `vision_tower`.
147152
image_forward_outs = self.model_.vision_tower(
153+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `device`.
148154
images.to(device=self.model_.device, dtype=self.model_.dtype),
149155
output_hidden_states=True,
150156
)
151157
image_features = self._feature_select(image_forward_outs).to(images.dtype)
158+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `multi_modal_projector`.
152159
image_features = self.model_.multi_modal_projector(image_features)
153160
return image_features
154161

155162
def image_preprocess(self, img: torch.Tensor) -> torch.Tensor:
163+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `crop_size`.
156164
target_h = self.image_processor.crop_size["height"]
157165
target_w = self.image_processor.crop_size["width"]
158166
# pad the image with median rgb value, to make a square
@@ -195,10 +203,14 @@ def image_preprocess(self, img: torch.Tensor) -> torch.Tensor:
195203
# print(resized.shape)
196204
# cropped = F.center_crop(img, output_size=[w, w])
197205
# print(cropped.shape)
206+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `rescale_factor`.
198207
scaled = resized * self.image_processor.rescale_factor
199208
# print(scaled)
200209
normed = F.normalize(
201-
scaled, self.image_processor.image_mean, self.image_processor.image_std
210+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `image_std`.
211+
scaled,
212+
self.image_processor.image_mean,
213+
self.image_processor.image_std,
202214
)
203215
# print(normed)
204216
return normed.unsqueeze(0)
@@ -249,7 +261,9 @@ def prefill_ref(
249261
) -> torch.Tensor:
250262
"""Avoiding the torch.where() call to find <image> placeholder and insert image embedding. Taking 3 inputs instead."""
251263
embeds = self.prefill_embedding(prompt_before_image, images, prompt_after_image)
264+
# pyre-ignore: Undefined attribute [16]: Module `transformers` has no attribute `LlamaForCausalLM`.
252265
return LlamaForCausalLM.forward(
266+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `language_model`.
253267
self.model_.language_model,
254268
inputs_embeds=embeds,
255269
return_dict=False,
@@ -268,12 +282,16 @@ class LlavaModel(EagerModelBase):
268282
def __init__(self, use_sdpa_with_kv_cache_op=True, max_seq_len=768):
269283
self.use_sdpa_with_kv_cache_op = use_sdpa_with_kv_cache_op
270284
self.max_seq_len = max_seq_len
271-
self.processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
285+
self.processor = AutoProcessor.from_pretrained(
286+
"llava-hf/llava-1.5-7b-hf",
287+
revision="a272c74b2481d8aff3aa6fc2c4bf891fe57334fb", # Need this for transformers >= 4.44.2
288+
)
272289
self.tokenizer = self.processor.tokenizer
273290
self.image_processor = self.processor.image_processor
274291
self.model = LlavaForConditionalGeneration.from_pretrained(
275292
"llava-hf/llava-1.5-7b-hf",
276293
device_map="cpu",
294+
revision="a272c74b2481d8aff3aa6fc2c4bf891fe57334fb", # Need this for transformers >= 4.44.2
277295
)
278296
self.image = Image.open(
279297
requests.get(

examples/models/llava/targets.bzl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_oss_build_kwargs", "runtime")
2+
3+
def define_common_targets():
4+
runtime.cxx_binary(
5+
name = "main",
6+
srcs = [
7+
"main.cpp",
8+
],
9+
compiler_flags = ["-Wno-global-constructors"],
10+
preprocessor_flags = [
11+
"-DET_USE_THREADPOOL",
12+
],
13+
deps = [
14+
"//executorch/examples/models/llava/runner:runner",
15+
"//executorch/extension/evalue_util:print_evalue",
16+
"//executorch/extension/threadpool:cpuinfo_utils",
17+
"//executorch/extension/threadpool:threadpool",
18+
],
19+
external_deps = [
20+
"gflags",
21+
"torch-core-cpp",
22+
],
23+
**get_oss_build_kwargs()
24+
)

0 commit comments

Comments
 (0)