diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py
index 3b7dfac88a2f4e..12981d0b98aeaf 100755
--- a/python/paddle/distributed/fleet/base/distributed_strategy.py
+++ b/python/paddle/distributed/fleet/base/distributed_strategy.py
@@ -776,14 +776,13 @@ def sparse_optimizer_config(sgd, strategy, prefix):
def set_sparse_table_config(table_data, config):
for key in config:
if key not in support_sparse_key_list:
- raise ValueError("strategy key '%s' not support" % (key))
+ raise ValueError(f"strategy key '{key}' not support")
table_class = config.get(
"sparse_table_class", "DownpourSparseTable"
)
if table_class not in support_sparse_table_class:
raise ValueError(
- "support sparse_table_class: ['DownpourSparseTable, DownpourSparseSSDTable'], but actual %s"
- % (table_class)
+ f"support sparse_table_class: ['DownpourSparseTable, DownpourSparseSSDTable'], but actual {table_class}"
)
if table_class == "DownpourSparseSSDTable":
table_data.table_class = 'SSDSparseTable'
@@ -806,8 +805,7 @@ def set_sparse_table_config(table_data, config):
)
if accessor_class not in support_sparse_accessor_class:
raise ValueError(
- "support sparse_accessor_class: ['DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor'], but actual %s"
- % (accessor_class)
+ f"support sparse_accessor_class: ['DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor'], but actual {accessor_class}"
)
if accessor_class.find("Double") >= 0:
diff --git a/python/paddle/distributed/fleet/base/graphviz.py b/python/paddle/distributed/fleet/base/graphviz.py
index 5e0b0c186b9b8c..44483483d7c4cf 100644
--- a/python/paddle/distributed/fleet/base/graphviz.py
+++ b/python/paddle/distributed/fleet/base/graphviz.py
@@ -21,7 +21,7 @@
def crepr(v):
if isinstance(v, str):
- return '"%s"' % v
+ return f'"{v}"'
return str(v)
@@ -255,7 +255,7 @@ def add_op(self, opType, **kwargs):
highlight = kwargs['highlight']
del kwargs['highlight']
return self.graph.node(
- "<%s>" % opType,
+ f"<{opType}>",
prefix="op",
description=opType,
shape="box",
diff --git a/python/paddle/distributed/fleet/base/strategy_group.py b/python/paddle/distributed/fleet/base/strategy_group.py
index 9e2b5a30ea2876..86870beb917e75 100644
--- a/python/paddle/distributed/fleet/base/strategy_group.py
+++ b/python/paddle/distributed/fleet/base/strategy_group.py
@@ -115,7 +115,7 @@ def __repr__(self):
if not self.list_of_group:
return debug_str + "No group."
for i in range(len(self.list_of_group)):
- debug_str += f"Group[{i}]: {str(self.list_of_group[i])}; "
+ debug_str += f"Group[{i}]: {self.list_of_group[i]}; "
return debug_str
diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py
index 7eeb9dc027dc33..2a9142fbfcc6b6 100755
--- a/python/paddle/distributed/fleet/base/util_factory.py
+++ b/python/paddle/distributed/fleet/base/util_factory.py
@@ -404,8 +404,7 @@ def _proto_check(self, config):
train_prog_var = train_prog.global_block().var(var_name)
except ValueError as e:
print(
- "Not find variable '%s' in train program. please check pruning."
- % var_name
+ f"Not find variable '{var_name}' in train program. please check pruning."
)
is_match = False
continue
@@ -669,7 +668,7 @@ def check_not_expected_ops(prog, not_expected_op_types):
return_numpy=return_numpy,
)
for i, v in enumerate(fetch_list):
- print("fetch_targets name: %s" % v.name)
+ print(f"fetch_targets name: {v.name}")
print(f"fetch_targets: {results[i]}")
return results
diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py
index 7963128f2c6d9b..15c0d0b26463a8 100644
--- a/python/paddle/distributed/fleet/data_generator/data_generator.py
+++ b/python/paddle/distributed/fleet/data_generator/data_generator.py
@@ -318,10 +318,10 @@ def _gen_str(self, line):
for item in line:
name, elements = item
if not isinstance(name, str):
- raise ValueError("name%s must be in str type" % type(name))
+ raise ValueError(f"name{type(name)} must be in str type")
if not isinstance(elements, list):
raise ValueError(
- "elements%s must be in list type" % type(elements)
+ f"elements{type(elements)} must be in list type"
)
if not elements:
raise ValueError(
@@ -336,8 +336,7 @@ def _gen_str(self, line):
self._proto_info[-1] = (name, "float")
elif not isinstance(elem, int):
raise ValueError(
- "the type of element%s must be in int or float"
- % type(elem)
+ f"the type of element{type(elem)} must be in int or float"
)
output += " " + str(elem)
else:
@@ -348,10 +347,10 @@ def _gen_str(self, line):
for index, item in enumerate(line):
name, elements = item
if not isinstance(name, str):
- raise ValueError("name%s must be in str type" % type(name))
+ raise ValueError(f"name{type(name)} must be in str type")
if not isinstance(elements, list):
raise ValueError(
- "elements%s must be in list type" % type(elements)
+ f"elements{type(elements)} must be in list type"
)
if not elements:
raise ValueError(
@@ -370,8 +369,7 @@ def _gen_str(self, line):
self._proto_info[index] = (name, "float")
elif not isinstance(elem, int):
raise ValueError(
- "the type of element%s must be in int or float"
- % type(elem)
+ f"the type of element{type(elem)} must be in int or float"
)
output += " " + str(elem)
return output + "\n"
diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py
index 3c2748fc78c4a1..e6c67dcebd5c9d 100755
--- a/python/paddle/distributed/fleet/dataset/dataset.py
+++ b/python/paddle/distributed/fleet/dataset/dataset.py
@@ -315,8 +315,7 @@ def _check_use_var_with_data_generator(
for i, ele in enumerate(user_parsed_line):
if len(ele[1]) == 0:
raise ValueError(
- "var length error: var %s's length in data_generator is 0"
- % ele[0]
+ f"var length error: var {ele[0]}'s length in data_generator is 0"
)
if var_list[i].dtype == paddle.float32 and not all(
diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py
index 39a72aa1182f1a..c391d3556bbd7b 100755
--- a/python/paddle/distributed/fleet/fleet.py
+++ b/python/paddle/distributed/fleet/fleet.py
@@ -83,8 +83,7 @@ def __impl__(*args, **kwargs):
and cls._role_maker._is_non_distributed() is True
):
logger.warning(
- "%s() function doesn't work when use non_distributed fleet."
- % (func.__name__)
+ f"{func.__name__}() function doesn't work when use non_distributed fleet."
)
return
diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py
index 31e117a8ef5b2c..47ae5dc7d2b66b 100755
--- a/python/paddle/distributed/fleet/launch_utils.py
+++ b/python/paddle/distributed/fleet/launch_utils.py
@@ -291,7 +291,7 @@ def get_cluster(
trainer.accelerators.extend(devices_per_proc[i])
else:
trainer.accelerators.append(devices_per_proc[i])
- trainer.endpoint = "%s" % (cur_node_endpoints[i])
+ trainer.endpoint = f"{cur_node_endpoints[i]}"
trainer.rank = trainer_rank
trainer_rank += 1
@@ -498,7 +498,7 @@ def start_local_trainers(
for idx, t in enumerate(pod.trainers):
proc_env = {
"PADDLE_TRAINER_ID": "%d" % t.rank,
- "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint,
+ "PADDLE_CURRENT_ENDPOINT": f"{t.endpoint}",
"PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(),
"PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()),
"PADDLE_RANK_IN_NODE": str(idx),
@@ -523,18 +523,18 @@ def start_local_trainers(
]
if len(t.accelerators) > 0 and pod.device_mode == DeviceMode.GPU:
- proc_env["FLAGS_selected_gpus"] = "%s" % ",".join(
- [str(g) for g in t.accelerators]
+ proc_env["FLAGS_selected_gpus"] = "{}".format(
+ ",".join([str(g) for g in t.accelerators])
)
if len(t.accelerators) > 0:
- proc_env["FLAGS_selected_accelerators"] = "%s" % ",".join(
- [str(g) for g in t.accelerators]
+ proc_env["FLAGS_selected_accelerators"] = "{}".format(
+ ",".join([str(g) for g in t.accelerators])
)
# to do: same code style in future
if framework.core.is_compiled_with_xpu() and len(t.accelerators) > 0:
- proc_env["FLAGS_selected_xpus"] = "%s" % ",".join(
- [str(g) for g in t.accelerators]
+ proc_env["FLAGS_selected_xpus"] = "{}".format(
+ ",".join([str(g) for g in t.accelerators])
)
current_env.update(proc_env)
@@ -571,9 +571,9 @@ def start_local_trainers(
pre_fn = None if os.name == 'nt' else os.setsid
if log_dir is not None:
os.makedirs(log_dir, exist_ok=True)
- if os.path.exists("%s/endpoints.log" % log_dir):
+ if os.path.exists(f"{log_dir}/endpoints.log"):
os.remove(f"{log_dir}/endpoints.log")
- with open("%s/endpoints.log" % log_dir, "w") as f:
+ with open(f"{log_dir}/endpoints.log", "w") as f:
f.write("PADDLE_TRAINER_ENDPOINTS: \n")
f.write("\n".join(cluster.trainers_endpoints()))
if (
@@ -613,8 +613,7 @@ def pull_worker_log(tp):
except UnicodeEncodeError:
sys.stdout.write(
'UnicodeEncodeError occurs at this line. '
- 'Please refer to the original log file "%s"\n'
- % tp.log_fn.name
+ f'Please refer to the original log file "{tp.log_fn.name}"\n'
)
tp.log_offset = fin.tell()
@@ -883,7 +882,7 @@ def get_mapped_cluster_without_rank_mapping(
assert len(ranks_per_node) == 1
for i in range(len(ranks_per_node)):
trainer = Trainer()
- trainer.endpoint = "%s" % (cur_node_endpoints[i])
+ trainer.endpoint = f"{cur_node_endpoints[i]}"
trainer.rank = ranks_per_node[i]
pod.trainers.append(trainer)
cluster.pods.append(pod)
@@ -1002,7 +1001,7 @@ def get_relative_gpu_id(gpu_id):
trainer.accelerators.append(
get_relative_gpu_id(local_device_ids[0])
)
- trainer.endpoint = "%s" % (cur_node_endpoints[i])
+ trainer.endpoint = f"{cur_node_endpoints[i]}"
trainer.rank = ranks_per_node[i]
pod.trainers.append(trainer)
cluster.pods.append(pod)
@@ -1936,7 +1935,7 @@ def check_backend(backend):
"paddle.distributed initialize error, "
"backend argument can only be one of "
"'nccl', 'gloo', 'bkcl', 'auto', 'heter', 'xccl' "
- "but got %s" % backend
+ f"but got {backend}"
)
if backend == 'nccl' and not framework.core.is_compiled_with_cuda():
diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
index cee43657c75275..e2f6503e356599 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
@@ -82,10 +82,9 @@ def __init__(
raise TypeError(
"The type of grad_clip should be 'ClipGradByNorm', because DGCMomentumOptimizer only support ClipGradByNorm"
)
- assert isinstance(num_trainers, int), (
- "The type of num_trainers should be 'int', but received %s"
- % type(num_trainers)
- )
+ assert isinstance(
+ num_trainers, int
+ ), f"The type of num_trainers should be 'int', but received {type(num_trainers)}"
assert (
num_trainers > 0
), "The value of num_trainers should be greater than 0!"
diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
index 3ce159385fa5a4..a11cb12be74e9a 100644
--- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
@@ -288,8 +288,7 @@ def get_sys_free_mem():
return free
else:
raise ValueError(
- "%s platform is unsupported is parameter server optimizer"
- % (platform.system())
+ f"{platform.system()} platform is unsupported is parameter server optimizer"
)
if not isinstance(self.inner_opt, paddle.optimizer.SGD):
@@ -334,8 +333,7 @@ def get_sys_free_mem():
if x < 0:
if neg_dim_count >= 1:
raise ValueError(
- "Var %s has more than one negative dim."
- % (var_name)
+ f"Var {var_name} has more than one negative dim."
)
neg_dim_count += 1
data_count *= -x
diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
index 9ef2471708e286..7475af50b44414 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py
@@ -208,8 +208,7 @@ def get_sys_free_mem():
return free
else:
raise ValueError(
- "%s platform is unsupported is parameter server optimizer"
- % (platform.system())
+ f"{platform.system()} platform is unsupported is parameter server optimizer"
)
if not isinstance(self.inner_opt, paddle.optimizer.SGD):
@@ -248,8 +247,7 @@ def get_sys_free_mem():
if x < 0:
if neg_dim_count >= 1:
raise ValueError(
- "Var %s has more than one negative dim."
- % (var_name)
+ f"Var {var_name} has more than one negative dim."
)
neg_dim_count += 1
data_count *= -x
diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
index dfdeef1a341c02..c0a51d0d4e04b6 100755
--- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
@@ -132,7 +132,7 @@ def _get_sharding_segment_strategy(self):
self._forward_remain_anchors = []
else:
raise NotImplementedError(
- f"the sharding segment strategy [{str(segment_strategy)}] is not implemented"
+ f"the sharding segment strategy [{segment_strategy}] is not implemented"
)
self._sharding_segment_strategy = segment_strategy
diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
index ff7730c7729047..40ca84cccb2686 100755
--- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
+++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
@@ -631,7 +631,7 @@ def _print_segmentation_for_debug(self):
)
for index, layer in enumerate(self._layers_desc[start:end]):
- logger.info(f"{index + start}: {str(layer)}")
+ logger.info(f"{index + start}: {layer}")
if self._num_virtual_pipeline_stages > 1:
for stage in range(self._num_stages):
diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py
index 84473ef7ddbbba..bd50e80d7d965b 100644
--- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py
+++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py
@@ -67,8 +67,8 @@ def initialize_p2p_groups(
) = _hcg.get_p2p_groups()
debug_str = (
- f"P2pInfo: send_next_group: {repr(send_next_group)}, send_prev_group: {repr(send_prev_group)}, "
- f"recv_next_group: {repr(recv_next_group)}, recv_prev_group: {repr(recv_prev_group)}"
+ f"P2pInfo: send_next_group: {send_next_group!r}, send_prev_group: {send_prev_group!r}, "
+ f"recv_next_group: {recv_next_group!r}, recv_prev_group: {recv_prev_group!r}"
)
logger.info(debug_str)
diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py
index 38829816877152..0f9137f02cbf9d 100644
--- a/python/paddle/distributed/fleet/runtime/the_one_ps.py
+++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py
@@ -472,11 +472,11 @@ def __init__(self):
def to_string(self, indent):
program_str = "{}tensor {{{}\n{}}}"
attrs = ""
- attrs += f"feed_var_name: \"{str(self.feed_var_name)}\" "
- attrs += f"fetch_var_name: \"{str(self.fetch_var_name)}\" "
- attrs += f"startup_program_id: {str(self.startup_program_id)} "
- attrs += f"main_program_id: {str(self.main_program_id)} "
- attrs += f"tensor_table_class: \"{str(self.tensor_table_class)}\" "
+ attrs += f"feed_var_name: \"{self.feed_var_name}\" "
+ attrs += f"fetch_var_name: \"{self.fetch_var_name}\" "
+ attrs += f"startup_program_id: {self.startup_program_id} "
+ attrs += f"main_program_id: {self.main_program_id} "
+ attrs += f"tensor_table_class: \"{self.tensor_table_class}\" "
attrs += "\n"
return program_str.format(
conv_indent(indent), attrs, conv_indent(indent)
diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py
index 681fcdb67eb0ec..15018824286d17 100644
--- a/python/paddle/distributed/fleet/utils/fs.py
+++ b/python/paddle/distributed/fleet/utils/fs.py
@@ -481,7 +481,7 @@ def __init__(
sleep_inter=1000,
): # ms
self.pre_commands = []
- hadoop_bin = '%s/bin/hadoop' % hadoop_home
+ hadoop_bin = f'{hadoop_home}/bin/hadoop'
self.pre_commands.append(hadoop_bin)
dfs = 'fs'
self.pre_commands.append(dfs)
@@ -1216,7 +1216,7 @@ def list_files_info(self, path_list):
)
ret, lines = self._run_cmd(cmd)
if len(lines) == 0:
- logger.warning("list_files empty, path[%s]" % path_list)
+ logger.warning(f"list_files empty, path[{path_list}]")
return []
for line in lines:
arr = line.split(' ')
diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py
index 9f0a6d1f77f691..a8c5c525bb3a3b 100644
--- a/python/paddle/distributed/fleet/utils/log_util.py
+++ b/python/paddle/distributed/fleet/utils/log_util.py
@@ -67,8 +67,6 @@ def layer_to_str(base, *args, **kwargs):
if kwargs:
name += ", "
if kwargs:
- name += ", ".join(
- f"{key}={str(value)}" for key, value in kwargs.items()
- )
+ name += ", ".join(f"{key}={value}" for key, value in kwargs.items())
name += ")"
return name
diff --git a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
index e3970ce9364014..0b67a3c8845002 100644
--- a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
+++ b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py
@@ -109,7 +109,7 @@ def apply(self, src_model_path: str, dst_model_path: str):
# first rank extract shared layer
with_shared = True
for dir in src_dirs:
- print("extract layer params in dir %s" % dir)
+ print(f"extract layer params in dir {dir}")
layers.extend(self.extract_layers(dir, with_shared))
with_shared = False
# 2、sort and unique layers
@@ -240,7 +240,7 @@ def priority(elem):
return float(match.group(1).lstrip("."))
# strictly sort layers
- print("before sort %s" % ("|".join([e[0] for e in layers])))
+ print("before sort {}".format("|".join([e[0] for e in layers])))
layers.sort(key=priority)
# unique
unique_layers = []
@@ -248,7 +248,7 @@ def priority(elem):
if unique_layers and e[0] == unique_layers[-1][0]:
continue
unique_layers.append(e)
- print("after sort %s " % ("|".join([e[0] for e in unique_layers])))
+ print("after sort {} ".format("|".join([e[0] for e in unique_layers])))
return unique_layers
def segment_layers(
@@ -358,7 +358,7 @@ def merge(src, dst, map_k=None):
lr_scheduler = None
for layer_names, file_path in layers_segment:
- print("load %s" % file_path)
+ print(f"load {file_path}")
layer = paddle.load(file_path)
def get_param_name_mapper(layer_name):