diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 3b7dfac88a2f4e..12981d0b98aeaf 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -776,14 +776,13 @@ def sparse_optimizer_config(sgd, strategy, prefix): def set_sparse_table_config(table_data, config): for key in config: if key not in support_sparse_key_list: - raise ValueError("strategy key '%s' not support" % (key)) + raise ValueError(f"strategy key '{key}' not support") table_class = config.get( "sparse_table_class", "DownpourSparseTable" ) if table_class not in support_sparse_table_class: raise ValueError( - "support sparse_table_class: ['DownpourSparseTable, DownpourSparseSSDTable'], but actual %s" - % (table_class) + f"support sparse_table_class: ['DownpourSparseTable, DownpourSparseSSDTable'], but actual {table_class}" ) if table_class == "DownpourSparseSSDTable": table_data.table_class = 'SSDSparseTable' @@ -806,8 +805,7 @@ def set_sparse_table_config(table_data, config): ) if accessor_class not in support_sparse_accessor_class: raise ValueError( - "support sparse_accessor_class: ['DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor'], but actual %s" - % (accessor_class) + f"support sparse_accessor_class: ['DownpourSparseValueAccessor', 'DownpourCtrAccessor', 'DownpourCtrDoubleAccessor', 'DownpourUnitAccessor', 'DownpourDoubleUnitAccessor', 'DownpourCtrDymfAccessor'], but actual {accessor_class}" ) if accessor_class.find("Double") >= 0: diff --git a/python/paddle/distributed/fleet/base/graphviz.py b/python/paddle/distributed/fleet/base/graphviz.py index 5e0b0c186b9b8c..44483483d7c4cf 100644 --- a/python/paddle/distributed/fleet/base/graphviz.py +++ b/python/paddle/distributed/fleet/base/graphviz.py @@ -21,7 +21,7 @@ def crepr(v): if isinstance(v, str): - return '"%s"' % v + return f'"{v}"' return str(v) @@ -255,7 +255,7 @@ def add_op(self, opType, **kwargs): highlight = kwargs['highlight'] del kwargs['highlight'] return self.graph.node( - "<%s>" % opType, + f"<{opType}>", prefix="op", description=opType, shape="box", diff --git a/python/paddle/distributed/fleet/base/strategy_group.py b/python/paddle/distributed/fleet/base/strategy_group.py index 9e2b5a30ea2876..86870beb917e75 100644 --- a/python/paddle/distributed/fleet/base/strategy_group.py +++ b/python/paddle/distributed/fleet/base/strategy_group.py @@ -115,7 +115,7 @@ def __repr__(self): if not self.list_of_group: return debug_str + "No group." for i in range(len(self.list_of_group)): - debug_str += f"Group[{i}]: {str(self.list_of_group[i])}; " + debug_str += f"Group[{i}]: {self.list_of_group[i]}; " return debug_str diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index 7eeb9dc027dc33..2a9142fbfcc6b6 100755 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -404,8 +404,7 @@ def _proto_check(self, config): train_prog_var = train_prog.global_block().var(var_name) except ValueError as e: print( - "Not find variable '%s' in train program. please check pruning." - % var_name + f"Not find variable '{var_name}' in train program. please check pruning." ) is_match = False continue @@ -669,7 +668,7 @@ def check_not_expected_ops(prog, not_expected_op_types): return_numpy=return_numpy, ) for i, v in enumerate(fetch_list): - print("fetch_targets name: %s" % v.name) + print(f"fetch_targets name: {v.name}") print(f"fetch_targets: {results[i]}") return results diff --git a/python/paddle/distributed/fleet/data_generator/data_generator.py b/python/paddle/distributed/fleet/data_generator/data_generator.py index 7963128f2c6d9b..15c0d0b26463a8 100644 --- a/python/paddle/distributed/fleet/data_generator/data_generator.py +++ b/python/paddle/distributed/fleet/data_generator/data_generator.py @@ -318,10 +318,10 @@ def _gen_str(self, line): for item in line: name, elements = item if not isinstance(name, str): - raise ValueError("name%s must be in str type" % type(name)) + raise ValueError(f"name{type(name)} must be in str type") if not isinstance(elements, list): raise ValueError( - "elements%s must be in list type" % type(elements) + f"elements{type(elements)} must be in list type" ) if not elements: raise ValueError( @@ -336,8 +336,7 @@ def _gen_str(self, line): self._proto_info[-1] = (name, "float") elif not isinstance(elem, int): raise ValueError( - "the type of element%s must be in int or float" - % type(elem) + f"the type of element{type(elem)} must be in int or float" ) output += " " + str(elem) else: @@ -348,10 +347,10 @@ def _gen_str(self, line): for index, item in enumerate(line): name, elements = item if not isinstance(name, str): - raise ValueError("name%s must be in str type" % type(name)) + raise ValueError(f"name{type(name)} must be in str type") if not isinstance(elements, list): raise ValueError( - "elements%s must be in list type" % type(elements) + f"elements{type(elements)} must be in list type" ) if not elements: raise ValueError( @@ -370,8 +369,7 @@ def _gen_str(self, line): self._proto_info[index] = (name, "float") elif not isinstance(elem, int): raise ValueError( - "the type of element%s must be in int or float" - % type(elem) + f"the type of element{type(elem)} must be in int or float" ) output += " " + str(elem) return output + "\n" diff --git a/python/paddle/distributed/fleet/dataset/dataset.py b/python/paddle/distributed/fleet/dataset/dataset.py index 3c2748fc78c4a1..e6c67dcebd5c9d 100755 --- a/python/paddle/distributed/fleet/dataset/dataset.py +++ b/python/paddle/distributed/fleet/dataset/dataset.py @@ -315,8 +315,7 @@ def _check_use_var_with_data_generator( for i, ele in enumerate(user_parsed_line): if len(ele[1]) == 0: raise ValueError( - "var length error: var %s's length in data_generator is 0" - % ele[0] + f"var length error: var {ele[0]}'s length in data_generator is 0" ) if var_list[i].dtype == paddle.float32 and not all( diff --git a/python/paddle/distributed/fleet/fleet.py b/python/paddle/distributed/fleet/fleet.py index 39a72aa1182f1a..c391d3556bbd7b 100755 --- a/python/paddle/distributed/fleet/fleet.py +++ b/python/paddle/distributed/fleet/fleet.py @@ -83,8 +83,7 @@ def __impl__(*args, **kwargs): and cls._role_maker._is_non_distributed() is True ): logger.warning( - "%s() function doesn't work when use non_distributed fleet." - % (func.__name__) + f"{func.__name__}() function doesn't work when use non_distributed fleet." ) return diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 31e117a8ef5b2c..47ae5dc7d2b66b 100755 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -291,7 +291,7 @@ def get_cluster( trainer.accelerators.extend(devices_per_proc[i]) else: trainer.accelerators.append(devices_per_proc[i]) - trainer.endpoint = "%s" % (cur_node_endpoints[i]) + trainer.endpoint = f"{cur_node_endpoints[i]}" trainer.rank = trainer_rank trainer_rank += 1 @@ -498,7 +498,7 @@ def start_local_trainers( for idx, t in enumerate(pod.trainers): proc_env = { "PADDLE_TRAINER_ID": "%d" % t.rank, - "PADDLE_CURRENT_ENDPOINT": "%s" % t.endpoint, + "PADDLE_CURRENT_ENDPOINT": f"{t.endpoint}", "PADDLE_TRAINERS_NUM": "%d" % cluster.trainers_nranks(), "PADDLE_TRAINER_ENDPOINTS": ",".join(cluster.trainers_endpoints()), "PADDLE_RANK_IN_NODE": str(idx), @@ -523,18 +523,18 @@ def start_local_trainers( ] if len(t.accelerators) > 0 and pod.device_mode == DeviceMode.GPU: - proc_env["FLAGS_selected_gpus"] = "%s" % ",".join( - [str(g) for g in t.accelerators] + proc_env["FLAGS_selected_gpus"] = "{}".format( + ",".join([str(g) for g in t.accelerators]) ) if len(t.accelerators) > 0: - proc_env["FLAGS_selected_accelerators"] = "%s" % ",".join( - [str(g) for g in t.accelerators] + proc_env["FLAGS_selected_accelerators"] = "{}".format( + ",".join([str(g) for g in t.accelerators]) ) # to do: same code style in future if framework.core.is_compiled_with_xpu() and len(t.accelerators) > 0: - proc_env["FLAGS_selected_xpus"] = "%s" % ",".join( - [str(g) for g in t.accelerators] + proc_env["FLAGS_selected_xpus"] = "{}".format( + ",".join([str(g) for g in t.accelerators]) ) current_env.update(proc_env) @@ -571,9 +571,9 @@ def start_local_trainers( pre_fn = None if os.name == 'nt' else os.setsid if log_dir is not None: os.makedirs(log_dir, exist_ok=True) - if os.path.exists("%s/endpoints.log" % log_dir): + if os.path.exists(f"{log_dir}/endpoints.log"): os.remove(f"{log_dir}/endpoints.log") - with open("%s/endpoints.log" % log_dir, "w") as f: + with open(f"{log_dir}/endpoints.log", "w") as f: f.write("PADDLE_TRAINER_ENDPOINTS: \n") f.write("\n".join(cluster.trainers_endpoints())) if ( @@ -613,8 +613,7 @@ def pull_worker_log(tp): except UnicodeEncodeError: sys.stdout.write( 'UnicodeEncodeError occurs at this line. ' - 'Please refer to the original log file "%s"\n' - % tp.log_fn.name + f'Please refer to the original log file "{tp.log_fn.name}"\n' ) tp.log_offset = fin.tell() @@ -883,7 +882,7 @@ def get_mapped_cluster_without_rank_mapping( assert len(ranks_per_node) == 1 for i in range(len(ranks_per_node)): trainer = Trainer() - trainer.endpoint = "%s" % (cur_node_endpoints[i]) + trainer.endpoint = f"{cur_node_endpoints[i]}" trainer.rank = ranks_per_node[i] pod.trainers.append(trainer) cluster.pods.append(pod) @@ -1002,7 +1001,7 @@ def get_relative_gpu_id(gpu_id): trainer.accelerators.append( get_relative_gpu_id(local_device_ids[0]) ) - trainer.endpoint = "%s" % (cur_node_endpoints[i]) + trainer.endpoint = f"{cur_node_endpoints[i]}" trainer.rank = ranks_per_node[i] pod.trainers.append(trainer) cluster.pods.append(pod) @@ -1936,7 +1935,7 @@ def check_backend(backend): "paddle.distributed initialize error, " "backend argument can only be one of " "'nccl', 'gloo', 'bkcl', 'auto', 'heter', 'xccl' " - "but got %s" % backend + f"but got {backend}" ) if backend == 'nccl' and not framework.core.is_compiled_with_cuda(): diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index cee43657c75275..e2f6503e356599 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -82,10 +82,9 @@ def __init__( raise TypeError( "The type of grad_clip should be 'ClipGradByNorm', because DGCMomentumOptimizer only support ClipGradByNorm" ) - assert isinstance(num_trainers, int), ( - "The type of num_trainers should be 'int', but received %s" - % type(num_trainers) - ) + assert isinstance( + num_trainers, int + ), f"The type of num_trainers should be 'int', but received {type(num_trainers)}" assert ( num_trainers > 0 ), "The value of num_trainers should be greater than 0!" diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index 3ce159385fa5a4..a11cb12be74e9a 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -288,8 +288,7 @@ def get_sys_free_mem(): return free else: raise ValueError( - "%s platform is unsupported is parameter server optimizer" - % (platform.system()) + f"{platform.system()} platform is unsupported is parameter server optimizer" ) if not isinstance(self.inner_opt, paddle.optimizer.SGD): @@ -334,8 +333,7 @@ def get_sys_free_mem(): if x < 0: if neg_dim_count >= 1: raise ValueError( - "Var %s has more than one negative dim." - % (var_name) + f"Var {var_name} has more than one negative dim." ) neg_dim_count += 1 data_count *= -x diff --git a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py index 9ef2471708e286..7475af50b44414 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/ps_optimizer.py @@ -208,8 +208,7 @@ def get_sys_free_mem(): return free else: raise ValueError( - "%s platform is unsupported is parameter server optimizer" - % (platform.system()) + f"{platform.system()} platform is unsupported is parameter server optimizer" ) if not isinstance(self.inner_opt, paddle.optimizer.SGD): @@ -248,8 +247,7 @@ def get_sys_free_mem(): if x < 0: if neg_dim_count >= 1: raise ValueError( - "Var %s has more than one negative dim." - % (var_name) + f"Var {var_name} has more than one negative dim." ) neg_dim_count += 1 data_count *= -x diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index dfdeef1a341c02..c0a51d0d4e04b6 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -132,7 +132,7 @@ def _get_sharding_segment_strategy(self): self._forward_remain_anchors = [] else: raise NotImplementedError( - f"the sharding segment strategy [{str(segment_strategy)}] is not implemented" + f"the sharding segment strategy [{segment_strategy}] is not implemented" ) self._sharding_segment_strategy = segment_strategy diff --git a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py index ff7730c7729047..40ca84cccb2686 100755 --- a/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py +++ b/python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py @@ -631,7 +631,7 @@ def _print_segmentation_for_debug(self): ) for index, layer in enumerate(self._layers_desc[start:end]): - logger.info(f"{index + start}: {str(layer)}") + logger.info(f"{index + start}: {layer}") if self._num_virtual_pipeline_stages > 1: for stage in range(self._num_stages): diff --git a/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py b/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py index 84473ef7ddbbba..bd50e80d7d965b 100644 --- a/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py +++ b/python/paddle/distributed/fleet/meta_parallel/pp_utils/four_directions_p2p_communication.py @@ -67,8 +67,8 @@ def initialize_p2p_groups( ) = _hcg.get_p2p_groups() debug_str = ( - f"P2pInfo: send_next_group: {repr(send_next_group)}, send_prev_group: {repr(send_prev_group)}, " - f"recv_next_group: {repr(recv_next_group)}, recv_prev_group: {repr(recv_prev_group)}" + f"P2pInfo: send_next_group: {send_next_group!r}, send_prev_group: {send_prev_group!r}, " + f"recv_next_group: {recv_next_group!r}, recv_prev_group: {recv_prev_group!r}" ) logger.info(debug_str) diff --git a/python/paddle/distributed/fleet/runtime/the_one_ps.py b/python/paddle/distributed/fleet/runtime/the_one_ps.py index 38829816877152..0f9137f02cbf9d 100644 --- a/python/paddle/distributed/fleet/runtime/the_one_ps.py +++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py @@ -472,11 +472,11 @@ def __init__(self): def to_string(self, indent): program_str = "{}tensor {{{}\n{}}}" attrs = "" - attrs += f"feed_var_name: \"{str(self.feed_var_name)}\" " - attrs += f"fetch_var_name: \"{str(self.fetch_var_name)}\" " - attrs += f"startup_program_id: {str(self.startup_program_id)} " - attrs += f"main_program_id: {str(self.main_program_id)} " - attrs += f"tensor_table_class: \"{str(self.tensor_table_class)}\" " + attrs += f"feed_var_name: \"{self.feed_var_name}\" " + attrs += f"fetch_var_name: \"{self.fetch_var_name}\" " + attrs += f"startup_program_id: {self.startup_program_id} " + attrs += f"main_program_id: {self.main_program_id} " + attrs += f"tensor_table_class: \"{self.tensor_table_class}\" " attrs += "\n" return program_str.format( conv_indent(indent), attrs, conv_indent(indent) diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index 681fcdb67eb0ec..15018824286d17 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -481,7 +481,7 @@ def __init__( sleep_inter=1000, ): # ms self.pre_commands = [] - hadoop_bin = '%s/bin/hadoop' % hadoop_home + hadoop_bin = f'{hadoop_home}/bin/hadoop' self.pre_commands.append(hadoop_bin) dfs = 'fs' self.pre_commands.append(dfs) @@ -1216,7 +1216,7 @@ def list_files_info(self, path_list): ) ret, lines = self._run_cmd(cmd) if len(lines) == 0: - logger.warning("list_files empty, path[%s]" % path_list) + logger.warning(f"list_files empty, path[{path_list}]") return [] for line in lines: arr = line.split(' ') diff --git a/python/paddle/distributed/fleet/utils/log_util.py b/python/paddle/distributed/fleet/utils/log_util.py index 9f0a6d1f77f691..a8c5c525bb3a3b 100644 --- a/python/paddle/distributed/fleet/utils/log_util.py +++ b/python/paddle/distributed/fleet/utils/log_util.py @@ -67,8 +67,6 @@ def layer_to_str(base, *args, **kwargs): if kwargs: name += ", " if kwargs: - name += ", ".join( - f"{key}={str(value)}" for key, value in kwargs.items() - ) + name += ", ".join(f"{key}={value}" for key, value in kwargs.items()) name += ")" return name diff --git a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py index e3970ce9364014..0b67a3c8845002 100644 --- a/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py +++ b/python/paddle/distributed/fleet/utils/pp_parallel_adaptor.py @@ -109,7 +109,7 @@ def apply(self, src_model_path: str, dst_model_path: str): # first rank extract shared layer with_shared = True for dir in src_dirs: - print("extract layer params in dir %s" % dir) + print(f"extract layer params in dir {dir}") layers.extend(self.extract_layers(dir, with_shared)) with_shared = False # 2、sort and unique layers @@ -240,7 +240,7 @@ def priority(elem): return float(match.group(1).lstrip(".")) # strictly sort layers - print("before sort %s" % ("|".join([e[0] for e in layers]))) + print("before sort {}".format("|".join([e[0] for e in layers]))) layers.sort(key=priority) # unique unique_layers = [] @@ -248,7 +248,7 @@ def priority(elem): if unique_layers and e[0] == unique_layers[-1][0]: continue unique_layers.append(e) - print("after sort %s " % ("|".join([e[0] for e in unique_layers]))) + print("after sort {} ".format("|".join([e[0] for e in unique_layers]))) return unique_layers def segment_layers( @@ -358,7 +358,7 @@ def merge(src, dst, map_k=None): lr_scheduler = None for layer_names, file_path in layers_segment: - print("load %s" % file_path) + print(f"load {file_path}") layer = paddle.load(file_path) def get_param_name_mapper(layer_name):