@@ -345,6 +345,44 @@ def encode(self, positive, negative, vae, width, height, length, batch_size, sta
345345 out_latent ["samples" ] = latent
346346 return (positive , negative , out_latent )
347347
348+ class WanPhantomSubjectToVideo :
349+ @classmethod
350+ def INPUT_TYPES (s ):
351+ return {"required" : {"positive" : ("CONDITIONING" , ),
352+ "negative" : ("CONDITIONING" , ),
353+ "vae" : ("VAE" , ),
354+ "width" : ("INT" , {"default" : 832 , "min" : 16 , "max" : nodes .MAX_RESOLUTION , "step" : 16 }),
355+ "height" : ("INT" , {"default" : 480 , "min" : 16 , "max" : nodes .MAX_RESOLUTION , "step" : 16 }),
356+ "length" : ("INT" , {"default" : 81 , "min" : 1 , "max" : nodes .MAX_RESOLUTION , "step" : 4 }),
357+ "batch_size" : ("INT" , {"default" : 1 , "min" : 1 , "max" : 4096 }),
358+ },
359+ "optional" : {"images" : ("IMAGE" , ),
360+ }}
361+
362+ RETURN_TYPES = ("CONDITIONING" , "CONDITIONING" , "CONDITIONING" , "LATENT" )
363+ RETURN_NAMES = ("positive" , "negative_text" , "negative_img_text" , "latent" )
364+ FUNCTION = "encode"
365+
366+ CATEGORY = "conditioning/video_models"
367+
368+ def encode (self , positive , negative , vae , width , height , length , batch_size , images ):
369+ latent = torch .zeros ([batch_size , 16 , ((length - 1 ) // 4 ) + 1 , height // 8 , width // 8 ], device = comfy .model_management .intermediate_device ())
370+ cond2 = negative
371+ if images is not None :
372+ images = comfy .utils .common_upscale (images [:length ].movedim (- 1 , 1 ), width , height , "bilinear" , "center" ).movedim (1 , - 1 )
373+ latent_images = []
374+ for i in images :
375+ latent_images += [vae .encode (i .unsqueeze (0 )[:, :, :, :3 ])]
376+ concat_latent_image = torch .cat (latent_images , dim = 2 )
377+
378+ positive = node_helpers .conditioning_set_values (positive , {"time_dim_concat" : concat_latent_image })
379+ cond2 = node_helpers .conditioning_set_values (negative , {"time_dim_concat" : concat_latent_image })
380+ negative = node_helpers .conditioning_set_values (negative , {"time_dim_concat" : comfy .latent_formats .Wan21 ().process_out (torch .zeros_like (concat_latent_image ))})
381+
382+ out_latent = {}
383+ out_latent ["samples" ] = latent
384+ return (positive , cond2 , negative , out_latent )
385+
348386NODE_CLASS_MAPPINGS = {
349387 "WanImageToVideo" : WanImageToVideo ,
350388 "WanFunControlToVideo" : WanFunControlToVideo ,
@@ -353,4 +391,5 @@ def encode(self, positive, negative, vae, width, height, length, batch_size, sta
353391 "WanVaceToVideo" : WanVaceToVideo ,
354392 "TrimVideoLatent" : TrimVideoLatent ,
355393 "WanCameraImageToVideo" : WanCameraImageToVideo ,
394+ "WanPhantomSubjectToVideo" : WanPhantomSubjectToVideo ,
356395}
0 commit comments