diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/auto/auto_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/auto/auto_model.py index d74a1ecc4588..a283bb7b46fe 100644 --- a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/auto/auto_model.py +++ b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/auto/auto_model.py @@ -49,6 +49,11 @@ except: flash_attention = None +try: + from paddle.jit.api import set_dynamic_shape +except: + from paddle.jit.dy2static.utils_helper import set_dynamic_shape + def shard_op_for_sequence_parallel_linear(tgt, mesh): # FIXME Hack to shard op for module (linear) # we only shard the second to the last op (matmul) leave the last op (elementwise_add) un-touched @@ -1206,7 +1211,7 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f attn_mask = model_kwargs["attention_mask"] # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static. - paddle.jit.dy2static.utils_helper.set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1]) + set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1]) model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None max_length = paddle.to_tensor(max_length) while cur_len < max_length: diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py index 0d789d72f3ec..ae955581cbb3 100644 --- a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py +++ b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/hybrid_model.py @@ -62,11 +62,17 @@ from paddle.nn.functional.flash_attention import flash_attention except: flash_attention = None + try: from paddle.incubate.nn.layer.fused_dropout_add import FusedDropoutAdd except: FusedDropoutAdd = None +try: + from paddle.jit.api import set_dynamic_shape +except: + from paddle.jit.dy2static.utils_helper import set_dynamic_shape + def get_attr(layer, name): if getattr(layer, name, None) is not None: return getattr(layer, name, None) @@ -1501,7 +1507,7 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f attn_mask = model_kwargs["attention_mask"] # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static. - paddle.jit.dy2static.utils_helper.set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1]) + set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1]) model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None while cur_len < max_length: # Note(GuoxiaWang): Remove outputs = _forward_(**model_kwargs) diff --git a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py index bfe00cbc7419..80ca22b855ca 100644 --- a/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py +++ b/model_zoo/gpt-3/ppfleetx/models/language_model/gpt/dygraph/single_model.py @@ -43,6 +43,10 @@ except: flash_attention = None +try: + from paddle.jit.api import set_dynamic_shape +except: + from paddle.jit.dy2static.utils_helper import set_dynamic_shape def get_attr(layer, name): if getattr(layer, name, None) is not None: @@ -1077,7 +1081,7 @@ def _post_process_(outputs, input_ids, cur_len, origin_len, scores, unfinished_f attn_mask = model_kwargs["attention_mask"] # make the shape of attention_mask = (-1, -1, -1, -1) in dy2static. - paddle.jit.dy2static.utils_helper.set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1]) + set_dynamic_shape(model_kwargs["attention_mask"], [-1, -1, -1, -1]) model_kwargs["cache"] = outputs[1] if isinstance(outputs, tuple) else None if hasattr(paddle.framework, "_no_check_dy2st_diff"): # TODO(wanghuancoder): _no_check_dy2st_diff is used to turn off the checking of behavior