@@ -1569,56 +1569,35 @@ def _validate_mm_placeholders(
1569
1569
"model (usually arising from an inconsistency between "
1570
1570
"`_call_hf_processor` and `_get_prompt_updates`)." )
1571
1571
1572
- def apply (
1572
+ def _hash_mm_items (
1573
1573
self ,
1574
- prompt : Union [str , list [int ]],
1575
- mm_data : MultiModalDataDict ,
1574
+ mm_items : MultiModalDataItems ,
1576
1575
hf_processor_mm_kwargs : Mapping [str , object ],
1577
- return_mm_hashes : bool = False ,
1578
- ) -> MultiModalInputs :
1579
- """
1580
- Process multi-modal inputs to be used in vLLM.
1576
+ ) -> dict [str , list [str ]]:
1577
+ """Create MM hashes to be returned (only used in V1)."""
1581
1578
1582
- The main steps are:
1583
-
1584
- 1. Apply HF Processor on prompt text and multi-modal data together,
1585
- outputting token IDs and processed tensors.
1586
- 2. Find and update sequences in the token IDs with placeholder tokens.
1587
- The number of placeholder tokens equals the feature size of the
1588
- multi-modal data outputted by the multi-modal encoder.
1589
- 3. Extract information about the placeholder tokens from the
1590
- processed token IDs.
1591
- """
1592
- mm_items = self ._to_mm_items (mm_data )
1593
-
1594
- # Create MM hashes to be returned (only used in V1)
1595
1579
# TODO: Use these hash keys for caching operations in apply_hf_processor
1596
1580
# instead of rehashing.
1581
+ model_id = self .info .model_id
1597
1582
1598
- if return_mm_hashes :
1599
- model_id = self .info .model_id
1600
- mm_hashes = {
1601
- modality : [
1602
- MultiModalHasher .hash_kwargs (model_id = model_id ,
1603
- ** {modality : item },
1604
- ** hf_processor_mm_kwargs )
1605
- for item in items
1606
- ]
1607
- for modality , items in mm_items .items ()
1608
- }
1609
- else :
1610
- mm_hashes = None
1611
-
1612
- (
1613
- prompt_ids ,
1614
- mm_kwargs ,
1615
- is_update_applied ,
1616
- ) = self ._cached_apply_hf_processor (
1617
- prompt ,
1618
- mm_items ,
1619
- hf_processor_mm_kwargs ,
1620
- )
1583
+ return {
1584
+ modality : [
1585
+ MultiModalHasher .hash_kwargs (model_id = model_id ,
1586
+ ** {modality : item },
1587
+ ** hf_processor_mm_kwargs )
1588
+ for item in items
1589
+ ]
1590
+ for modality , items in mm_items .items ()
1591
+ }
1621
1592
1593
+ def _maybe_apply_prompt_updates (
1594
+ self ,
1595
+ mm_items : MultiModalDataItems ,
1596
+ hf_processor_mm_kwargs : Mapping [str , object ],
1597
+ prompt_ids : list [int ],
1598
+ mm_kwargs : MultiModalKwargs ,
1599
+ is_update_applied : bool ,
1600
+ ) -> tuple [list [int ], str , Mapping [str , list [PlaceholderFeaturesInfo ]]]:
1622
1601
unbound_prompt_updates = self ._get_prompt_updates (
1623
1602
mm_items ,
1624
1603
hf_processor_mm_kwargs ,
@@ -1652,6 +1631,51 @@ def apply(
1652
1631
)
1653
1632
self ._validate_mm_placeholders (mm_placeholders , mm_item_counts )
1654
1633
1634
+ return prompt_ids , prompt , mm_placeholders
1635
+
1636
+ def apply (
1637
+ self ,
1638
+ prompt : Union [str , list [int ]],
1639
+ mm_data : MultiModalDataDict ,
1640
+ hf_processor_mm_kwargs : Mapping [str , object ],
1641
+ return_mm_hashes : bool = False ,
1642
+ ) -> MultiModalInputs :
1643
+ """
1644
+ Process multi-modal inputs to be used in vLLM.
1645
+
1646
+ The main steps are:
1647
+
1648
+ 1. Apply HF Processor on prompt text and multi-modal data together,
1649
+ outputting token IDs and processed tensors.
1650
+ 2. Find and update sequences in the token IDs with placeholder tokens.
1651
+ The number of placeholder tokens equals the feature size of the
1652
+ multi-modal data outputted by the multi-modal encoder.
1653
+ 3. Extract information about the placeholder tokens from the
1654
+ processed token IDs.
1655
+ """
1656
+ mm_items = self ._to_mm_items (mm_data )
1657
+
1658
+ mm_hashes = (self ._hash_mm_items (mm_items , hf_processor_mm_kwargs )
1659
+ if return_mm_hashes else None )
1660
+
1661
+ (
1662
+ prompt_ids ,
1663
+ mm_kwargs ,
1664
+ is_update_applied ,
1665
+ ) = self ._cached_apply_hf_processor (
1666
+ prompt ,
1667
+ mm_items ,
1668
+ hf_processor_mm_kwargs ,
1669
+ )
1670
+
1671
+ prompt_ids , prompt , mm_placeholders = self ._maybe_apply_prompt_updates (
1672
+ mm_items = mm_items ,
1673
+ hf_processor_mm_kwargs = hf_processor_mm_kwargs ,
1674
+ prompt_ids = prompt_ids ,
1675
+ mm_kwargs = mm_kwargs ,
1676
+ is_update_applied = is_update_applied ,
1677
+ )
1678
+
1655
1679
mm_placeholder_ranges = {
1656
1680
modality : [item .to_range () for item in placeholders ]
1657
1681
for modality , placeholders in mm_placeholders .items ()
0 commit comments