From 70841984211bfdeaaa51fba41d642c153d6b695a Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Fri, 21 Jul 2023 09:46:19 +0000 Subject: [PATCH 01/12] pcqm4m changes --- expts/configs/config_mpnn_10M_pcqm4m.yaml | 15 +++++++-------- graphium/data/datamodule.py | 9 ++++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml index 63fab1970..2d648fd1f 100644 --- a/expts/configs/config_mpnn_10M_pcqm4m.yaml +++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml @@ -56,23 +56,21 @@ datamodule: homolumo: df: null task_level: "graph" - df_path: graphium/data/PCQM4M/pcqm4mv2-20k.csv + df_path: graphium/data/PCQM4M/pcqm4mv2.csv # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] - sample_size: 8000 # use sample_size for test - # splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` - # split_names: ["train", "valid", "test-dev"] + # sample_size: 8000 # use sample_size for test + splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + split_names: ["train", "valid", "test-dev"] # graphium/data/PCQM4Mv2/split_dict.pt # graphium/data/PCQM4Mv2/pcqm4m_split.csv - split_val: 0.1 - split_test: 0.1 + # split_val: 0.1 + # split_test: 0.1 seed: *seed label_normalization: method: "normal" - min_clipping: 0 - max_clipping: 50 # Featurization prepare_dict_or_graph: pyg:graph @@ -261,6 +259,7 @@ metrics: multitask_handling: mean-per-label trainer: + seed: *seed logger: save_dir: logs/PCQMv2 name: *name diff --git a/graphium/data/datamodule.py b/graphium/data/datamodule.py index d4c0966ed..79ab1abc9 100644 --- a/graphium/data/datamodule.py +++ b/graphium/data/datamodule.py @@ -1906,9 +1906,12 @@ def _get_split_indices( f"file type `{file_type}` for `{splits_path}` not recognised, please use .pt, .csv or .tsv" ) train, val, test = split_names - train_indices = np.asarray(splits[train].dropna()).astype("int").tolist() - val_indices = np.asarray(splits[val].dropna()).astype("int").tolist() - test_indices = np.asarray(splits[test].dropna()).astype("int").tolist() + train_indices = np.asarray(splits[train]).astype("int") + train_indices = train_indices[~np.isnan(train_indices)].tolist() + val_indices = np.asarray(splits[val]).astype("int") + val_indices = val_indices[~np.isnan(val_indices)].tolist() + test_indices = np.asarray(splits[test]).astype("int") + test_indices = test_indices[~np.isnan(test_indices)].tolist() # Filter train, val and test indices _, train_idx, _ = np.intersect1d(sample_idx, train_indices, return_indices=True) From 4a9cde583f40eddb9608ff372bcbce72aaac0a36 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Tue, 25 Jul 2023 14:07:57 +0000 Subject: [PATCH 02/12] mpnn and gps++ changes --- expts/configs/config_gpspp_10M_pcqm4m.yaml | 108 +++++++++++++----- expts/configs/config_mpnn_10M_pcqm4m.yaml | 19 ++- .../neurips2023_configs/config_large_gcn.yaml | 1 + 3 files changed, 97 insertions(+), 31 deletions(-) diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml index c6862cb06..649b9e045 100644 --- a/expts/configs/config_gpspp_10M_pcqm4m.yaml +++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml @@ -3,8 +3,59 @@ constants: name: &name pcqm4mv2_gpspp_4layer seed: &seed 42 raise_train_error: true # Whether the code should raise an error if it crashes during training - accelerator: - type: ipu # cpu or ipu or gpu + entity: multitask-gnn + +accelerator: + type: ipu # cpu or ipu or gpu + config_override: + datamodule: + args: + ipu_dataloader_training_opts: + mode: async + max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54 + max_num_edges_per_graph: 60 + ipu_dataloader_inference_opts: + mode: async + max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118 + max_num_edges_per_graph: 120 + # Data handling-related + batch_size_training: 16 + batch_size_inference: 16 + predictor: + optim_kwargs: + loss_scaling: 1024 + trainer: + trainer: + precision: 16-true + accumulate_grad_batches: 4 + + ipu_config: + - deviceIterations(2) # IPU would require large batches to be ready for the model. + - replicationFactor(4) + # - enableProfiling("graph_analyser") # The folder where the profile will be stored + # - enableExecutableCaching("pop_compiler_cache") + - TensorLocations.numIOTiles(128) + - _Popart.set("defaultBufferingDepth", 128) + - Precision.enableStochasticRounding(True) + + ipu_inference_config: + # set device iteration and replication factor to 1 during inference + # gradient accumulation was set to 1 in the code + - deviceIterations(1) + - replicationFactor(1) + - Precision.enableStochasticRounding(False) + +# accelerator: +# type: cpu # cpu or ipu or gpu +# config_override: +# args: +# datamodule: +# batch_size_training: 256 +# batch_size_inference: 64 +# trainer: +# trainer: +# precision: 32 +# accumulate_grad_batches: 1 datamodule: module_type: "MultitaskFromSmilesDataModule" @@ -14,20 +65,20 @@ datamodule: homolumo: df: null task_level: "graph" - df_path: graphium/data/PCQM4Mv2/pcqm4mv2.csv #graphium/data/PCQM4Mv2/pcqm4mv2.csv + df_path: graphium/data/PCQM4M/pcqm4mv2.csv #graphium/data/PCQM4Mv2/pcqm4mv2.csv # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] - # sample_size: 80000 # use sample_size for test - splits_path: graphium/data/PCQM4Mv2/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + sample_size: 8000 # use sample_size for test + splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` # graphium/data/PCQM4Mv2/split_dict.pt # graphium/data/PCQM4Mv2/pcqm4m_split.csv split_names: ["train", "valid", "test-dev"] label_normalization: method: "normal" - min_clipping: 0 - max_clipping: 50 + min_clipping: [0] + max_clipping: [50] # Featurization prepare_dict_or_graph: pyg:graph @@ -49,36 +100,29 @@ datamodule: use_bonds_weights: False pos_encoding_as_features: # encoder dropout 0.18 pos_types: - la_pos: &pos_enc - pos_type: laplacian_eigvec_eigval #laplacian_eigvec + lap_eigvec: + pos_level: node + pos_type: laplacian_eigvec + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + lap_eigval: + pos_level: node + pos_type: laplacian_eigval num_pos: 8 normalization: "none" # nomrlization already applied on the eigen vectors disconnected_comp: True # if eigen values/vector for disconnected graph are included rw_pos: # use same name as pe_encoder - pos_type: rwse + pos_level: node + pos_type: rw_return_probs ksteps: 16 - # Data handling-related - batch_size_training: 16 - batch_size_inference: 16 # cache_data_path: . num_workers: 20 # -1 to use all persistent_workers: False # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" - ipu_dataloader_training_opts: - mode: async - max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54 - max_num_edges_per_graph: 60 - - ipu_dataloader_inference_opts: - mode: async - max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118 - max_num_edges_per_graph: 120 - # test-dev max nodes: 50, max_edges: 116 - # test-challenge max nodes: 51, max_edges: 106 architecture: model_type: FullGraphMultiTaskNetwork @@ -173,10 +217,22 @@ architecture: droppath_rate_ffn: 0.0 - post_nn: null + graph_output_nn: + graph: + pooling: [sum] + out_dim: 256 + hidden_dims: 256 + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none task_heads: homolumo: + task_level: graph out_dim: 1 hidden_dims: 256 depth: 2 # Not needed if we have hidden_dims diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml index 2d648fd1f..ffb294cb3 100644 --- a/expts/configs/config_mpnn_10M_pcqm4m.yaml +++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml @@ -3,6 +3,7 @@ constants: name: &name pcqm4mv2_mpnn_4layer seed: &seed 42 raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn accelerator: type: ipu # cpu or ipu or gpu @@ -37,12 +38,20 @@ accelerator: - _Popart.set("defaultBufferingDepth", 128) - Precision.enableStochasticRounding(True) + ipu_inference_config: + # set device iteration and replication factor to 1 during inference + # gradient accumulation was set to 1 in the code + - deviceIterations(1) + - replicationFactor(1) + - Precision.enableStochasticRounding(False) + # accelerator: # type: cpu # cpu or ipu or gpu # config_override: -# datamodule: -# batch_size_training: 256 -# batch_size_inference: 64 +# args: +# datamodule: +# batch_size_training: 256 +# batch_size_inference: 64 # trainer: # trainer: # precision: 32 @@ -71,6 +80,8 @@ datamodule: seed: *seed label_normalization: method: "normal" + min_clipping: [0] + max_clipping: [50] # Featurization prepare_dict_or_graph: pyg:graph @@ -112,8 +123,6 @@ datamodule: num_workers: 30 # -1 to use all persistent_workers: False # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" - architecture: model_type: FullGraphMultiTaskNetwork diff --git a/expts/neurips2023_configs/config_large_gcn.yaml b/expts/neurips2023_configs/config_large_gcn.yaml index 7745693b3..c972f855a 100644 --- a/expts/neurips2023_configs/config_large_gcn.yaml +++ b/expts/neurips2023_configs/config_large_gcn.yaml @@ -3,6 +3,7 @@ constants: name: &name neurips2023_large_data_gcn seed: &seed 42 raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn accelerator: type: ipu # cpu or ipu or gpu From 5bcb69c2690c1ff2d4461678fc64e0bd1828f2da Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Tue, 25 Jul 2023 15:05:07 +0000 Subject: [PATCH 03/12] gpspp tested ok --- expts/configs/config_gpspp_10M_pcqm4m.yaml | 51 ++++++++++++++++------ expts/configs/config_mpnn_10M_pcqm4m.yaml | 6 +-- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml index 649b9e045..dad4ac7b2 100644 --- a/expts/configs/config_gpspp_10M_pcqm4m.yaml +++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml @@ -30,8 +30,8 @@ accelerator: accumulate_grad_batches: 4 ipu_config: - - deviceIterations(2) # IPU would require large batches to be ready for the model. - - replicationFactor(4) + - deviceIterations(20) # IPU would require large batches to be ready for the model. + - replicationFactor(16) # - enableProfiling("graph_analyser") # The folder where the profile will be stored # - enableExecutableCaching("pop_compiler_cache") - TensorLocations.numIOTiles(128) @@ -70,11 +70,12 @@ datamodule: # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] - sample_size: 8000 # use sample_size for test + # sample_size: 8000 # use sample_size for test splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` # graphium/data/PCQM4Mv2/split_dict.pt # graphium/data/PCQM4Mv2/pcqm4m_split.csv split_names: ["train", "valid", "test-dev"] + seed: *seed label_normalization: method: "normal" min_clipping: [0] @@ -82,10 +83,10 @@ datamodule: # Featurization prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 20 + featurization_n_jobs: 30 featurization_progress: True featurization_backend: "loky" - processed_graph_data_path: "/tmp/graphium_data/PCQM4Mv2/" + processed_graph_data_path: "../datacache/PCQM4Mv2/" featurization: # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', @@ -119,7 +120,7 @@ datamodule: # cache_data_path: . - num_workers: 20 # -1 to use all + num_workers: 30 # -1 to use all persistent_workers: False # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. @@ -156,29 +157,29 @@ architecture: encoders: #la_pos | rw_pos la_pos: # Set as null to avoid a pre-nn network encoder_type: "laplacian_pe" - input_keys: ["eigvecs", "eigvals"] + input_keys: ["laplacian_eigvec", "laplacian_eigval"] output_keys: ["feat"] hidden_dim: 64 out_dim: 32 model_type: 'DeepSet' #'Transformer' or 'DeepSet' num_layers: 2 num_layers_post: 1 # Num. layers to apply after pooling - dropout: 0.18 + dropout: 0.1 first_normalization: "none" #"batch_norm" or "layer_norm" rw_pos: encoder_type: "mlp" - input_keys: ["rwse"] + input_keys: ["rw_return_probs"] output_keys: ["feat"] hidden_dim: 64 out_dim: 32 num_layers: 2 - dropout: 0.18 + dropout: 0.1 normalization: "layer_norm" #"batch_norm" or "layer_norm" first_normalization: "layer_norm" #"batch_norm" or "layer_norm" gaussian_pos: # 3D_bias encoder_type: "gaussian_kernel" input_keys: ["positions_3d"] - output_keys: ["feat", "graph_gaussian_bias_3d"] + output_keys: ["feat", "nodepair_gaussian_bias_3d"] num_heads: 32 num_layers: 1 #2 embed_dim: 32 @@ -197,7 +198,6 @@ architecture: normalization: "layer_norm" last_normalization: *normalization residual_type: simple - pooling: [sum] virtual_node: 'none' layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 @@ -210,7 +210,7 @@ architecture: out_dim_edges: 128 attn_type: "full-attention" # "full-attention", "none" precision: &precision 16 - biased_attention_key: "graph_gaussian_bias_3d" # 3D_bias + biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias attn_kwargs: num_heads: 32 droppath_rate_attn: 0.0 @@ -274,10 +274,33 @@ metrics: - name: mae metric: mae_ipu target_nan_mask: null - multitask_handling: flatten + multitask_handling: mean-per-label threshold_kwargs: null - name: pearsonr metric: pearsonr_ipu threshold_kwargs: null target_nan_mask: null multitask_handling: mean-per-label + +trainer: + seed: *seed + logger: + save_dir: logs/PCQMv2 + name: *name + project: PCQMv2_gpspp + #early_stopping: + # monitor: *monitor + # min_delta: 0 + # patience: 10 + # mode: &mode min + model_checkpoint: + dirpath: models_checkpoints/PCMQ4Mv2/ + filename: *name + #monitor: *monitor + #mode: *mode + save_top_k: 1 + every_n_epochs: 100 + trainer: + max_epochs: *max_epochs + min_epochs: 1 + check_val_every_n_epoch: 20 \ No newline at end of file diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml index ffb294cb3..cbe1ded53 100644 --- a/expts/configs/config_mpnn_10M_pcqm4m.yaml +++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml @@ -1,4 +1,4 @@ -# Testing the mpnn only model with the PCQMv2 dataset on IPU. +# MPNN model with the PCQMv2 dataset on IPU. constants: name: &name pcqm4mv2_mpnn_4layer seed: &seed 42 @@ -85,7 +85,7 @@ datamodule: # Featurization prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 20 + featurization_n_jobs: 30 featurization_progress: True featurization_backend: "loky" processed_graph_data_path: "../datacache/PCQM4Mv2/" @@ -259,7 +259,7 @@ metrics: - name: mae metric: mae_ipu target_nan_mask: null - multitask_handling: flatten + multitask_handling: mean-per-label threshold_kwargs: null - name: pearsonr metric: pearsonr_ipu From 5f69e792bd0d636216c48aa1ba586534d8204c95 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Tue, 25 Jul 2023 15:07:36 +0000 Subject: [PATCH 04/12] minot update for gcn large --- expts/neurips2023_configs/config_large_gcn.yaml | 1 - expts/neurips2023_configs/debug/config_large_gcn_debug.yaml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/expts/neurips2023_configs/config_large_gcn.yaml b/expts/neurips2023_configs/config_large_gcn.yaml index c972f855a..033b8a5f5 100644 --- a/expts/neurips2023_configs/config_large_gcn.yaml +++ b/expts/neurips2023_configs/config_large_gcn.yaml @@ -171,7 +171,6 @@ datamodule: num_workers: 32 # -1 to use all persistent_workers: True # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" architecture: diff --git a/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml b/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml index db18ee10c..d6e4cb724 100644 --- a/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml +++ b/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml @@ -3,6 +3,7 @@ constants: name: &name neurips2023_large_data_gcn_debug seed: &seed 100 raise_train_error: true # Whether the code should raise an error if it crashes during training + entity: multitask-gnn accelerator: type: ipu # cpu or ipu or gpu @@ -169,7 +170,6 @@ datamodule: num_workers: 30 # -1 to use all persistent_workers: False # if use persistent worker at the start of each epoch. # Using persistent_workers false might make the start of each epoch very long. - featurization_backend: "loky" architecture: From 043f7ae91f82444c502171bcd736550c3a91c920 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Thu, 27 Jul 2023 08:54:50 +0000 Subject: [PATCH 05/12] add options for throughput optimization --- expts/configs/config_gpspp_10M_pcqm4m.yaml | 11 ++++++----- expts/configs/config_mpnn_10M_pcqm4m.yaml | 9 +++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml index dad4ac7b2..3bfaf3de2 100644 --- a/expts/configs/config_gpspp_10M_pcqm4m.yaml +++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml @@ -16,12 +16,13 @@ accelerator: max_num_edges_per_graph: 60 ipu_dataloader_inference_opts: mode: async - max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118 + max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118 max_num_edges_per_graph: 120 # Data handling-related batch_size_training: 16 batch_size_inference: 16 predictor: + metrics_every_n_train_steps: 1000 optim_kwargs: loss_scaling: 1024 trainer: @@ -78,8 +79,6 @@ datamodule: seed: *seed label_normalization: method: "normal" - min_clipping: [0] - max_clipping: [50] # Featurization prepare_dict_or_graph: pyg:graph @@ -209,7 +208,7 @@ architecture: in_dim_edges: 128 out_dim_edges: 128 attn_type: "full-attention" # "full-attention", "none" - precision: &precision 16 + precision: &precision 16-true biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias attn_kwargs: num_heads: 32 @@ -246,7 +245,9 @@ architecture: #Task-specific predictor: metrics_on_progress_bar: - homolumo: ["mae", "pearsonr"] + homolumo: [] + metrics_on_training_set: + homolumo: ["pearsonr"] loss_fun: homolumo: mae_ipu random_seed: *seed diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml index cbe1ded53..85291441e 100644 --- a/expts/configs/config_mpnn_10M_pcqm4m.yaml +++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml @@ -16,12 +16,13 @@ accelerator: max_num_edges_per_graph: 60 ipu_dataloader_inference_opts: mode: async - max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118 + max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118 max_num_edges_per_graph: 120 # Data handling-related batch_size_training: 64 batch_size_inference: 16 predictor: + metrics_every_n_train_steps: 1000 optim_kwargs: loss_scaling: 1024 trainer: @@ -80,8 +81,6 @@ datamodule: seed: *seed label_normalization: method: "normal" - min_clipping: [0] - max_clipping: [50] # Featurization prepare_dict_or_graph: pyg:graph @@ -232,7 +231,9 @@ architecture: #Task-specific predictor: metrics_on_progress_bar: - homolumo: ["mae", "pearsonr"] + homolumo: [] + metrics_on_training_set: + homolumo: ["pearsonr"] loss_fun: homolumo: mae_ipu random_seed: *seed From a5f4ebce7c02c81bd9a2a16573957482f6efe676 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Thu, 27 Jul 2023 15:25:15 +0000 Subject: [PATCH 06/12] fix run_validation_test.py after hydra changes --- expts/configs/config_mpnn_10M_pcqm4m.yaml | 2 +- expts/main_run_multitask.py | 1 - expts/run_validation_test.py | 38 +++++++---------------- 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml index 85291441e..2a4c18b5b 100644 --- a/expts/configs/config_mpnn_10M_pcqm4m.yaml +++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml @@ -12,7 +12,7 @@ accelerator: args: ipu_dataloader_training_opts: mode: async - max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54 + max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54 max_num_edges_per_graph: 60 ipu_dataloader_inference_opts: mode: async diff --git a/expts/main_run_multitask.py b/expts/main_run_multitask.py index 64e1c185b..c14670377 100644 --- a/expts/main_run_multitask.py +++ b/expts/main_run_multitask.py @@ -73,7 +73,6 @@ def main(cfg: DictConfig) -> None: save_params_to_wandb(trainer.logger, cfg, predictor, datamodule) # Determine the max num nodes and edges in training and validation - logger.info("About to set the max nodes etc.") predictor.set_max_nodes_edges_per_graph(datamodule, stages=["train", "val"]) # Run the model training diff --git a/expts/run_validation_test.py b/expts/run_validation_test.py index cf6248d24..06804301c 100644 --- a/expts/run_validation_test.py +++ b/expts/run_validation_test.py @@ -4,7 +4,7 @@ from os.path import dirname, abspath import yaml from copy import deepcopy -from omegaconf import DictConfig +from omegaconf import DictConfig, OmegaConf import timeit from loguru import logger from datetime import datetime @@ -20,41 +20,33 @@ load_trainer, save_params_to_wandb, load_accelerator, - load_yaml_config, ) from graphium.utils.safe_run import SafeRun +import hydra # WandB import wandb # Set up the working directory MAIN_DIR = dirname(dirname(abspath(graphium.__file__))) - -# CONFIG_FILE = "expts/configs/config_mpnn_10M_b3lyp.yaml" -# CONFIG_FILE = "expts/configs/config_mpnn_10M_pcqm4m.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_debug.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_large_mpnn.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_large_gcn.yaml" -CONFIG_FILE = "expts/neurips2023_configs/debug/config_large_gcn_debug.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_large_gin.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_large_gine.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_small_gcn.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_large_gcn.yaml" -# CONFIG_FILE = "exptas/neurips2023_configs/config_small_gin.yaml" -# CONFIG_FILE = "expts/neurips2023_configs/config_small_gine.yaml" os.chdir(MAIN_DIR) -def main(cfg: DictConfig, run_name: str = "main", add_date_time: bool = True) -> None: +@hydra.main(version_base=None, config_path="hydra-configs", config_name="main") +def main(cfg: DictConfig) -> None: + cfg = OmegaConf.to_container(cfg, resolve=True) + + run_name: str = "main" + add_date_time: bool = True + st = timeit.default_timer() date_time_suffix = "" if add_date_time: date_time_suffix = datetime.now().strftime("%d.%m.%Y_%H.%M.%S") - cfg = deepcopy(cfg) - wandb.init(project=cfg["constants"]["name"], config=cfg) + wandb.init(entity=cfg["constants"]["entity"], project=cfg["constants"]["name"], config=cfg) # Initialize the accelerator cfg, accelerator_type = load_accelerator(cfg) @@ -110,12 +102,4 @@ def main(cfg: DictConfig, run_name: str = "main", add_date_time: bool = True) -> if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--config", help="Path to the config file", default=None) - - args, unknown_args = parser.parse_known_args() - if args.config is not None: - CONFIG_FILE = args.config - cfg = load_yaml_config(CONFIG_FILE, MAIN_DIR, unknown_args) - - main(cfg) + main() From 06bfe9fcae55641178b044b06d344a981ead3054 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Thu, 27 Jul 2023 16:04:21 +0000 Subject: [PATCH 07/12] cpnvert pcqm4m mpnn to hydra configs --- expts/hydra-configs/accelerator/ipu.yaml | 4 +- .../dataset/accelerator/pcqm4m_ipu.yaml | 25 ++ expts/hydra-configs/dataset/pcqm4m.yaml | 221 ++++++++++++++++++ .../hydra-configs/experiment/pcqm4m_mpnn.yaml | 13 ++ expts/hydra-configs/main.yaml | 4 +- expts/hydra-configs/model/mpnn.yaml | 16 ++ 6 files changed, 279 insertions(+), 4 deletions(-) create mode 100644 expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml create mode 100644 expts/hydra-configs/dataset/pcqm4m.yaml create mode 100644 expts/hydra-configs/experiment/pcqm4m_mpnn.yaml create mode 100644 expts/hydra-configs/model/mpnn.yaml diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml index 6e7fc8e06..b7075af97 100644 --- a/expts/hydra-configs/accelerator/ipu.yaml +++ b/expts/hydra-configs/accelerator/ipu.yaml @@ -1,9 +1,9 @@ type: ipu ipu_config: - - deviceIterations(5) # IPU would require large batches to be ready for the model. + - deviceIterations(30) # IPU would require large batches to be ready for the model. - replicationFactor(16) # - enableProfiling("graph_analyser") # The folder where the profile will be stored # - enableExecutableCaching("pop_compiler_cache") - TensorLocations.numIOTiles(128) - - _Popart.set("defaultBufferingDepth", 128) + - _Popart.set("defaultBufferingDepth", 96) - Precision.enableStochasticRounding(True) \ No newline at end of file diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml new file mode 100644 index 000000000..ebe372605 --- /dev/null +++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml @@ -0,0 +1,25 @@ +# @package _global_ + +datamodule: + args: + ipu_dataloader_training_opts: + mode: async + max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54 + max_num_edges_per_graph: 60 + ipu_dataloader_inference_opts: + mode: async + max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118 + max_num_edges_per_graph: 120 + # Data handling-related + batch_size_training: 64 + batch_size_inference: 16 + +predictor: + metrics_every_n_train_steps: 1000 + optim_kwargs: + loss_scaling: 1024 + +trainer: + trainer: + precision: 16-true + accumulate_grad_batches: 4 \ No newline at end of file diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml new file mode 100644 index 000000000..46f36e00d --- /dev/null +++ b/expts/hydra-configs/dataset/pcqm4m.yaml @@ -0,0 +1,221 @@ +# @package _global_ + +datamodule: + module_type: "MultitaskFromSmilesDataModule" + # module_type: "FakeDataModule" # Option to use generated data + args: # Matches that in the test_multitask_datamodule.py case. + task_specific_args: # To be replaced by a new class "DatasetParams" + homolumo: + df: null + task_level: "graph" + df_path: graphium/data/PCQM4M/pcqm4mv2.csv + # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv + # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly + smiles_col: "cxsmiles" + label_cols: ["homo_lumo_gap"] + # sample_size: 8000 # use sample_size for test + splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + split_names: ["train", "valid", "test-dev"] + # graphium/data/PCQM4Mv2/split_dict.pt + # graphium/data/PCQM4Mv2/pcqm4m_split.csv + # split_val: 0.1 + # split_test: 0.1 + seed: ${constants.seed} + label_normalization: + method: "normal" + + # Featurization + prepare_dict_or_graph: pyg:graph + featurization_n_jobs: 30 + featurization_progress: True + featurization_backend: "loky" + processed_graph_data_path: "../datacache/PCQM4Mv2/" + featurization: + # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), + # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', + # 'num_chiral_centers (not included yet)'] + atom_property_list_onehot: [atomic-number, group, period, total-valence] + atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] + # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] + edge_property_list: [bond-type-onehot, stereo, in-ring] + add_self_loop: False + explicit_H: False # if H is included + use_bonds_weights: False + pos_encoding_as_features: # encoder dropout 0.18 + pos_types: + lap_eigvec: + pos_level: node + pos_type: laplacian_eigvec + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + lap_eigval: + pos_level: node + pos_type: laplacian_eigval + num_pos: 8 + normalization: "none" # nomrlization already applied on the eigen vectors + disconnected_comp: True # if eigen values/vector for disconnected graph are included + rw_pos: # use same name as pe_encoder + pos_level: node + pos_type: rw_return_probs + ksteps: 16 + + # cache_data_path: . + num_workers: 30 # -1 to use all + persistent_workers: False # if use persistent worker at the start of each epoch. + # Using persistent_workers false might make the start of each epoch very long. + +architecture: + model_type: FullGraphMultiTaskNetwork + mup_base_path: null + pre_nn: # Set as null to avoid a pre-nn network + out_dim: 256 + hidden_dims: 1024 + depth: 2 + activation: relu + last_activation: none + dropout: &dropout 0.18 + normalization: &normalization layer_norm + last_normalization: *normalization + residual_type: none + + pre_nn_edges: # Set as null to avoid a pre-nn network + out_dim: 128 + hidden_dims: 512 + depth: 2 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: *normalization + residual_type: none + + pe_encoders: + out_dim: 32 + pool: "sum" #"mean" "max" + last_norm: None #"batch_norm", "layer_norm" + encoders: #la_pos | rw_pos + la_pos: # Set as null to avoid a pre-nn network + encoder_type: "laplacian_pe" + input_keys: ["laplacian_eigvec", "laplacian_eigval"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + model_type: 'DeepSet' #'Transformer' or 'DeepSet' + num_layers: 2 + num_layers_post: 1 # Num. layers to apply after pooling + dropout: 0.1 + first_normalization: "none" #"batch_norm" or "layer_norm" + rw_pos: + encoder_type: "mlp" + input_keys: ["rw_return_probs"] + output_keys: ["feat"] + hidden_dim: 64 + out_dim: 32 + num_layers: 2 + dropout: 0.1 + normalization: "layer_norm" #"batch_norm" or "layer_norm" + first_normalization: "layer_norm" #"batch_norm" or "layer_norm" + + + + gnn: # Set as null to avoid a post-nn network + out_dim: 256 + hidden_dims: 256 + depth: 4 + activation: gelu + last_activation: none + dropout: 0.1 + normalization: "layer_norm" + last_normalization: *normalization + residual_type: simple + virtual_node: 'none' + + graph_output_nn: + graph: + pooling: [sum] + out_dim: 256 + hidden_dims: 256 + depth: 1 + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + + task_heads: + homolumo: + task_level: graph + out_dim: 1 + hidden_dims: 256 + depth: 2 # Not needed if we have hidden_dims + activation: relu + last_activation: none + dropout: *dropout + normalization: *normalization + last_normalization: "none" + residual_type: none + +#Task-specific +predictor: + metrics_on_progress_bar: + homolumo: [] + metrics_on_training_set: + homolumo: ["pearsonr"] + loss_fun: + homolumo: mae_ipu + random_seed: ${constants.seed} + optim_kwargs: + lr: 4.e-4 # warmup can be scheduled using torch_scheduler_kwargs + # weight_decay: 1.e-7 + torch_scheduler_kwargs: + module_type: WarmUpLinearLR + max_num_epochs: &max_epochs 100 + warmup_epochs: 10 + verbose: False + scheduler_kwargs: + # monitor: &monitor homolumo/mae/train + # mode: min + # frequency: 1 + target_nan_mask: null # null: no mask, 0: 0 mask, ignore: ignore nan values from loss + flag_kwargs: + n_steps: 0 # 1 + alpha: 0.0 # 0.01 + +# Task-specific +metrics: + homolumo: + - name: mae + metric: mae_ipu + target_nan_mask: null + multitask_handling: mean-per-label + threshold_kwargs: null + - name: pearsonr + metric: pearsonr_ipu + threshold_kwargs: null + target_nan_mask: null + multitask_handling: mean-per-label + +trainer: + seed: ${constants.seed} + logger: + save_dir: logs/PCQMv2 + name: ${constants.name} + project: PCQMv2_mpnn + #early_stopping: + # monitor: *monitor + # min_delta: 0 + # patience: 10 + # mode: &mode min + model_checkpoint: + dirpath: models_checkpoints/PCMQ4Mv2/ + filename: ${constants.name} + #monitor: *monitor + #mode: *mode + save_top_k: 1 + every_n_epochs: 100 + trainer: + max_epochs: *max_epochs + min_epochs: 1 + check_val_every_n_epoch: 20 \ No newline at end of file diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml new file mode 100644 index 000000000..037f5e967 --- /dev/null +++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml @@ -0,0 +1,13 @@ +# @package _global_ + +# MPNN model with the PCQMv2 dataset on IPU. +constants: + name: pcqm4mv2_mpnn_4layer + entity: "multitask-gnn" + seed: 42 + max_epochs: 100 + raise_train_error: true # Whether the code should raise an error if it crashes during training + +trainer: + model_checkpoint: + dirpath: models_checkpoints/PCMQ4Mv2/ \ No newline at end of file diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml index 198bccb0c..903b7fe1d 100644 --- a/expts/hydra-configs/main.yaml +++ b/expts/hydra-configs/main.yaml @@ -1,7 +1,7 @@ defaults: - accelerator: ipu - - dataset: toymix - - model: gcn + - dataset: pcqm4m + - model: mpnn # Specializations - experiment: ${dataset}_${model} diff --git a/expts/hydra-configs/model/mpnn.yaml b/expts/hydra-configs/model/mpnn.yaml new file mode 100644 index 000000000..0d322ade3 --- /dev/null +++ b/expts/hydra-configs/model/mpnn.yaml @@ -0,0 +1,16 @@ +# @package _global_ + +architecture: + gnn: + layer_type: 'pyg:gps' + layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 + node_residual: false + mpnn_type: 'pyg:mpnnplus' + mpnn_kwargs: + in_dim: 256 + out_dim: 256 + in_dim_edges: 128 + out_dim_edges: 128 + attn_type: "none" # "full-attention", "none" + # biased_attention: false + attn_kwargs: null \ No newline at end of file From e9c832c622290e0124cb31460466522dd9c96e11 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Thu, 27 Jul 2023 16:24:26 +0000 Subject: [PATCH 08/12] add gpspp pcqm4mv2 hydra configs --- expts/configs/config_gpspp_10M_pcqm4m.yaml | 6 +-- expts/hydra-configs/accelerator/ipu.yaml | 9 ++++- expts/hydra-configs/dataset/pcqm4m.yaml | 2 +- .../experiment/pcqm4m_gpspp.yaml | 13 +++++++ .../hydra-configs/experiment/pcqm4m_mpnn.yaml | 4 +- expts/hydra-configs/main.yaml | 2 +- expts/hydra-configs/model/gpspp.yaml | 38 +++++++++++++++++++ 7 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 expts/hydra-configs/experiment/pcqm4m_gpspp.yaml create mode 100644 expts/hydra-configs/model/gpspp.yaml diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml index 3bfaf3de2..62abd3a4a 100644 --- a/expts/configs/config_gpspp_10M_pcqm4m.yaml +++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml @@ -92,7 +92,7 @@ datamodule: # 'num_chiral_centers (not included yet)'] atom_property_list_onehot: [atomic-number, group, period, total-valence] atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] - conformer_property_list: [positions_3d] # 3D_bias + conformer_property_list: [positions_3d] # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] edge_property_list: [bond-type-onehot, stereo, in-ring] add_self_loop: False @@ -175,7 +175,7 @@ architecture: dropout: 0.1 normalization: "layer_norm" #"batch_norm" or "layer_norm" first_normalization: "layer_norm" #"batch_norm" or "layer_norm" - gaussian_pos: # 3D_bias + gaussian_pos: encoder_type: "gaussian_kernel" input_keys: ["positions_3d"] output_keys: ["feat", "nodepair_gaussian_bias_3d"] @@ -209,7 +209,7 @@ architecture: out_dim_edges: 128 attn_type: "full-attention" # "full-attention", "none" precision: &precision 16-true - biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias + biased_attention_key: "nodepair_gaussian_bias_3d" attn_kwargs: num_heads: 32 droppath_rate_attn: 0.0 diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml index b7075af97..372297e4c 100644 --- a/expts/hydra-configs/accelerator/ipu.yaml +++ b/expts/hydra-configs/accelerator/ipu.yaml @@ -6,4 +6,11 @@ ipu_config: # - enableExecutableCaching("pop_compiler_cache") - TensorLocations.numIOTiles(128) - _Popart.set("defaultBufferingDepth", 96) - - Precision.enableStochasticRounding(True) \ No newline at end of file + - Precision.enableStochasticRounding(True) + +ipu_inference_config: + # set device iteration and replication factor to 1 during inference + # gradient accumulation was set to 1 in the code + - deviceIterations(1) + - replicationFactor(1) + - Precision.enableStochasticRounding(False) \ No newline at end of file diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml index 46f36e00d..41194ee2f 100644 --- a/expts/hydra-configs/dataset/pcqm4m.yaml +++ b/expts/hydra-configs/dataset/pcqm4m.yaml @@ -13,7 +13,7 @@ datamodule: # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] - # sample_size: 8000 # use sample_size for test + sample_size: 8000 # use sample_size for test splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` split_names: ["train", "valid", "test-dev"] # graphium/data/PCQM4Mv2/split_dict.pt diff --git a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml new file mode 100644 index 000000000..d0e4e3a96 --- /dev/null +++ b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml @@ -0,0 +1,13 @@ +# @package _global_ + +# GPS++ model with the PCQMv2 dataset. +constants: + name: pcqm4mv2_gpspp_4layer + entity: "multitask-gnn" + seed: 42 + max_epochs: 100 + raise_train_error: true # Whether the code should raise an error if it crashes during training + +trainer: + model_checkpoint: + dirpath: models_checkpoints/PCMQ4Mv2/gpspp/ \ No newline at end of file diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml index 037f5e967..28eda3bdf 100644 --- a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml +++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml @@ -1,6 +1,6 @@ # @package _global_ -# MPNN model with the PCQMv2 dataset on IPU. +# MPNN model with the PCQMv2 dataset. constants: name: pcqm4mv2_mpnn_4layer entity: "multitask-gnn" @@ -10,4 +10,4 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/ \ No newline at end of file + dirpath: models_checkpoints/PCMQ4Mv2/mpnn/ \ No newline at end of file diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml index 903b7fe1d..e5a78fdfc 100644 --- a/expts/hydra-configs/main.yaml +++ b/expts/hydra-configs/main.yaml @@ -1,7 +1,7 @@ defaults: - accelerator: ipu - dataset: pcqm4m - - model: mpnn + - model: gpspp # Specializations - experiment: ${dataset}_${model} diff --git a/expts/hydra-configs/model/gpspp.yaml b/expts/hydra-configs/model/gpspp.yaml new file mode 100644 index 000000000..0b231fcf1 --- /dev/null +++ b/expts/hydra-configs/model/gpspp.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +architecture: + pe_encoders: + encoders: + gaussian_pos: + encoder_type: "gaussian_kernel" + input_keys: ["positions_3d"] + output_keys: ["feat", "nodepair_gaussian_bias_3d"] + num_heads: 32 + num_layers: 1 #2 + embed_dim: 32 + out_dim: 32 # need num of gaussian kernels 128 + # but currently it checks pe_out_dim == pe_out_dim in encoder_manager.py, line 128 + use_input_keys_prefix: False + + gnn: + layer_type: 'pyg:gps' + layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 + node_residual: false + mpnn_type: 'pyg:mpnnplus' + mpnn_kwargs: + in_dim: 256 + out_dim: 256 + in_dim_edges: 128 + out_dim_edges: 128 + attn_type: "full-attention" # "full-attention", "none" + precision: &precision 16-true + biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias + attn_kwargs: + num_heads: 32 + droppath_rate_attn: 0.0 + droppath_rate_ffn: 0.0 + +datamodule: + args: # Matches that in the test_multitask_datamodule.py case. + featurization: + conformer_property_list: [positions_3d] From 1c933b5385c0fef06579764a55973126f91229e4 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Thu, 27 Jul 2023 16:46:00 +0000 Subject: [PATCH 09/12] remove original configs --- expts/configs/config_gpspp_10M_pcqm4m.yaml | 307 --------------------- expts/configs/config_mpnn_10M_pcqm4m.yaml | 292 -------------------- expts/hydra-configs/main.yaml | 2 +- 3 files changed, 1 insertion(+), 600 deletions(-) delete mode 100644 expts/configs/config_gpspp_10M_pcqm4m.yaml delete mode 100644 expts/configs/config_mpnn_10M_pcqm4m.yaml diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml deleted file mode 100644 index 62abd3a4a..000000000 --- a/expts/configs/config_gpspp_10M_pcqm4m.yaml +++ /dev/null @@ -1,307 +0,0 @@ -# GPS++ model with the PCQMv2 dataset on IPU. -constants: - name: &name pcqm4mv2_gpspp_4layer - seed: &seed 42 - raise_train_error: true # Whether the code should raise an error if it crashes during training - entity: multitask-gnn - -accelerator: - type: ipu # cpu or ipu or gpu - config_override: - datamodule: - args: - ipu_dataloader_training_opts: - mode: async - max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54 - max_num_edges_per_graph: 60 - ipu_dataloader_inference_opts: - mode: async - max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118 - max_num_edges_per_graph: 120 - # Data handling-related - batch_size_training: 16 - batch_size_inference: 16 - predictor: - metrics_every_n_train_steps: 1000 - optim_kwargs: - loss_scaling: 1024 - trainer: - trainer: - precision: 16-true - accumulate_grad_batches: 4 - - ipu_config: - - deviceIterations(20) # IPU would require large batches to be ready for the model. - - replicationFactor(16) - # - enableProfiling("graph_analyser") # The folder where the profile will be stored - # - enableExecutableCaching("pop_compiler_cache") - - TensorLocations.numIOTiles(128) - - _Popart.set("defaultBufferingDepth", 128) - - Precision.enableStochasticRounding(True) - - ipu_inference_config: - # set device iteration and replication factor to 1 during inference - # gradient accumulation was set to 1 in the code - - deviceIterations(1) - - replicationFactor(1) - - Precision.enableStochasticRounding(False) - -# accelerator: -# type: cpu # cpu or ipu or gpu -# config_override: -# args: -# datamodule: -# batch_size_training: 256 -# batch_size_inference: 64 -# trainer: -# trainer: -# precision: 32 -# accumulate_grad_batches: 1 - -datamodule: - module_type: "MultitaskFromSmilesDataModule" - # module_type: "FakeDataModule" # Option to use generated data - args: # Matches that in the test_multitask_datamodule.py case. - task_specific_args: # To be replaced by a new class "DatasetParams" - homolumo: - df: null - task_level: "graph" - df_path: graphium/data/PCQM4M/pcqm4mv2.csv #graphium/data/PCQM4Mv2/pcqm4mv2.csv - # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv - # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly - smiles_col: "cxsmiles" - label_cols: ["homo_lumo_gap"] - # sample_size: 8000 # use sample_size for test - splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` - # graphium/data/PCQM4Mv2/split_dict.pt - # graphium/data/PCQM4Mv2/pcqm4m_split.csv - split_names: ["train", "valid", "test-dev"] - seed: *seed - label_normalization: - method: "normal" - - # Featurization - prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 30 - featurization_progress: True - featurization_backend: "loky" - processed_graph_data_path: "../datacache/PCQM4Mv2/" - featurization: - # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), - # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', - # 'num_chiral_centers (not included yet)'] - atom_property_list_onehot: [atomic-number, group, period, total-valence] - atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] - conformer_property_list: [positions_3d] - # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] - edge_property_list: [bond-type-onehot, stereo, in-ring] - add_self_loop: False - explicit_H: False # if H is included - use_bonds_weights: False - pos_encoding_as_features: # encoder dropout 0.18 - pos_types: - lap_eigvec: - pos_level: node - pos_type: laplacian_eigvec - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - lap_eigval: - pos_level: node - pos_type: laplacian_eigval - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - rw_pos: # use same name as pe_encoder - pos_level: node - pos_type: rw_return_probs - ksteps: 16 - - - # cache_data_path: . - num_workers: 30 # -1 to use all - persistent_workers: False # if use persistent worker at the start of each epoch. - # Using persistent_workers false might make the start of each epoch very long. - - -architecture: - model_type: FullGraphMultiTaskNetwork - mup_base_path: null - pre_nn: # Set as null to avoid a pre-nn network - out_dim: 256 - hidden_dims: 1024 - depth: 2 - activation: relu - last_activation: none - dropout: &dropout 0.18 - normalization: &normalization layer_norm - last_normalization: *normalization - residual_type: none - - pre_nn_edges: # Set as null to avoid a pre-nn network - out_dim: 128 - hidden_dims: 512 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: *normalization - residual_type: none - - pe_encoders: - out_dim: 32 - pool: "sum" #"mean" "max" - last_norm: None #"batch_norm", "layer_norm" - encoders: #la_pos | rw_pos - la_pos: # Set as null to avoid a pre-nn network - encoder_type: "laplacian_pe" - input_keys: ["laplacian_eigvec", "laplacian_eigval"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - model_type: 'DeepSet' #'Transformer' or 'DeepSet' - num_layers: 2 - num_layers_post: 1 # Num. layers to apply after pooling - dropout: 0.1 - first_normalization: "none" #"batch_norm" or "layer_norm" - rw_pos: - encoder_type: "mlp" - input_keys: ["rw_return_probs"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - num_layers: 2 - dropout: 0.1 - normalization: "layer_norm" #"batch_norm" or "layer_norm" - first_normalization: "layer_norm" #"batch_norm" or "layer_norm" - gaussian_pos: - encoder_type: "gaussian_kernel" - input_keys: ["positions_3d"] - output_keys: ["feat", "nodepair_gaussian_bias_3d"] - num_heads: 32 - num_layers: 1 #2 - embed_dim: 32 - out_dim: 32 # need num of gaussian kernels 128 - # but currently it checks pe_out_dim == pe_out_dim in encoder_manager.py, line 128 - use_input_keys_prefix: False - - - gnn: # Set as null to avoid a post-nn network - out_dim: 256 - hidden_dims: 256 - depth: 4 - activation: gelu - last_activation: none - dropout: 0.1 - normalization: "layer_norm" - last_normalization: *normalization - residual_type: simple - virtual_node: 'none' - layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 - node_residual: false - mpnn_type: 'pyg:mpnnplus' - mpnn_kwargs: - in_dim: 256 - out_dim: 256 - in_dim_edges: 128 - out_dim_edges: 128 - attn_type: "full-attention" # "full-attention", "none" - precision: &precision 16-true - biased_attention_key: "nodepair_gaussian_bias_3d" - attn_kwargs: - num_heads: 32 - droppath_rate_attn: 0.0 - droppath_rate_ffn: 0.0 - - - graph_output_nn: - graph: - pooling: [sum] - out_dim: 256 - hidden_dims: 256 - depth: 1 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - - task_heads: - homolumo: - task_level: graph - out_dim: 1 - hidden_dims: 256 - depth: 2 # Not needed if we have hidden_dims - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - -#Task-specific -predictor: - metrics_on_progress_bar: - homolumo: [] - metrics_on_training_set: - homolumo: ["pearsonr"] - loss_fun: - homolumo: mae_ipu - random_seed: *seed - optim_kwargs: - lr: 4.e-4 # warmup can be scheduled using torch_scheduler_kwargs - # weight_decay: 1.e-7 - # loss_scaling: 1024 - torch_scheduler_kwargs: - module_type: WarmUpLinearLR - max_num_epochs: &max_epochs 100 - warmup_epochs: 10 - verbose: False - scheduler_kwargs: - # monitor: &monitor homolumo/mae/train - # mode: min - # frequency: 1 - target_nan_mask: null # null: no mask, 0: 0 mask, ignore: ignore nan values from loss - flag_kwargs: - n_steps: 0 # 1 - alpha: 0.0 # 0.01 - -# Task-specific -metrics: - homolumo: - - name: mae - metric: mae_ipu - target_nan_mask: null - multitask_handling: mean-per-label - threshold_kwargs: null - - name: pearsonr - metric: pearsonr_ipu - threshold_kwargs: null - target_nan_mask: null - multitask_handling: mean-per-label - -trainer: - seed: *seed - logger: - save_dir: logs/PCQMv2 - name: *name - project: PCQMv2_gpspp - #early_stopping: - # monitor: *monitor - # min_delta: 0 - # patience: 10 - # mode: &mode min - model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/ - filename: *name - #monitor: *monitor - #mode: *mode - save_top_k: 1 - every_n_epochs: 100 - trainer: - max_epochs: *max_epochs - min_epochs: 1 - check_val_every_n_epoch: 20 \ No newline at end of file diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml deleted file mode 100644 index 2a4c18b5b..000000000 --- a/expts/configs/config_mpnn_10M_pcqm4m.yaml +++ /dev/null @@ -1,292 +0,0 @@ -# MPNN model with the PCQMv2 dataset on IPU. -constants: - name: &name pcqm4mv2_mpnn_4layer - seed: &seed 42 - raise_train_error: true # Whether the code should raise an error if it crashes during training - entity: multitask-gnn - -accelerator: - type: ipu # cpu or ipu or gpu - config_override: - datamodule: - args: - ipu_dataloader_training_opts: - mode: async - max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54 - max_num_edges_per_graph: 60 - ipu_dataloader_inference_opts: - mode: async - max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118 - max_num_edges_per_graph: 120 - # Data handling-related - batch_size_training: 64 - batch_size_inference: 16 - predictor: - metrics_every_n_train_steps: 1000 - optim_kwargs: - loss_scaling: 1024 - trainer: - trainer: - precision: 16-true - accumulate_grad_batches: 4 - - ipu_config: - - deviceIterations(20) # IPU would require large batches to be ready for the model. - - replicationFactor(16) - # - enableProfiling("graph_analyser") # The folder where the profile will be stored - # - enableExecutableCaching("pop_compiler_cache") - - TensorLocations.numIOTiles(128) - - _Popart.set("defaultBufferingDepth", 128) - - Precision.enableStochasticRounding(True) - - ipu_inference_config: - # set device iteration and replication factor to 1 during inference - # gradient accumulation was set to 1 in the code - - deviceIterations(1) - - replicationFactor(1) - - Precision.enableStochasticRounding(False) - -# accelerator: -# type: cpu # cpu or ipu or gpu -# config_override: -# args: -# datamodule: -# batch_size_training: 256 -# batch_size_inference: 64 -# trainer: -# trainer: -# precision: 32 -# accumulate_grad_batches: 1 - -datamodule: - module_type: "MultitaskFromSmilesDataModule" - # module_type: "FakeDataModule" # Option to use generated data - args: # Matches that in the test_multitask_datamodule.py case. - task_specific_args: # To be replaced by a new class "DatasetParams" - homolumo: - df: null - task_level: "graph" - df_path: graphium/data/PCQM4M/pcqm4mv2.csv - # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv - # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly - smiles_col: "cxsmiles" - label_cols: ["homo_lumo_gap"] - # sample_size: 8000 # use sample_size for test - splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` - split_names: ["train", "valid", "test-dev"] - # graphium/data/PCQM4Mv2/split_dict.pt - # graphium/data/PCQM4Mv2/pcqm4m_split.csv - # split_val: 0.1 - # split_test: 0.1 - seed: *seed - label_normalization: - method: "normal" - - # Featurization - prepare_dict_or_graph: pyg:graph - featurization_n_jobs: 30 - featurization_progress: True - featurization_backend: "loky" - processed_graph_data_path: "../datacache/PCQM4Mv2/" - featurization: - # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence), - # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring', - # 'num_chiral_centers (not included yet)'] - atom_property_list_onehot: [atomic-number, group, period, total-valence] - atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring] - # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring'] - edge_property_list: [bond-type-onehot, stereo, in-ring] - add_self_loop: False - explicit_H: False # if H is included - use_bonds_weights: False - pos_encoding_as_features: # encoder dropout 0.18 - pos_types: - lap_eigvec: - pos_level: node - pos_type: laplacian_eigvec - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - lap_eigval: - pos_level: node - pos_type: laplacian_eigval - num_pos: 8 - normalization: "none" # nomrlization already applied on the eigen vectors - disconnected_comp: True # if eigen values/vector for disconnected graph are included - rw_pos: # use same name as pe_encoder - pos_level: node - pos_type: rw_return_probs - ksteps: 16 - - # cache_data_path: . - num_workers: 30 # -1 to use all - persistent_workers: False # if use persistent worker at the start of each epoch. - # Using persistent_workers false might make the start of each epoch very long. - -architecture: - model_type: FullGraphMultiTaskNetwork - mup_base_path: null - pre_nn: # Set as null to avoid a pre-nn network - out_dim: 256 - hidden_dims: 1024 - depth: 2 - activation: relu - last_activation: none - dropout: &dropout 0.18 - normalization: &normalization layer_norm - last_normalization: *normalization - residual_type: none - - pre_nn_edges: # Set as null to avoid a pre-nn network - out_dim: 128 - hidden_dims: 512 - depth: 2 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: *normalization - residual_type: none - - pe_encoders: - out_dim: 32 - pool: "sum" #"mean" "max" - last_norm: None #"batch_norm", "layer_norm" - encoders: #la_pos | rw_pos - la_pos: # Set as null to avoid a pre-nn network - encoder_type: "laplacian_pe" - input_keys: ["laplacian_eigvec", "laplacian_eigval"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - model_type: 'DeepSet' #'Transformer' or 'DeepSet' - num_layers: 2 - num_layers_post: 1 # Num. layers to apply after pooling - dropout: 0.1 - first_normalization: "none" #"batch_norm" or "layer_norm" - rw_pos: - encoder_type: "mlp" - input_keys: ["rw_return_probs"] - output_keys: ["feat"] - hidden_dim: 64 - out_dim: 32 - num_layers: 2 - dropout: 0.1 - normalization: "layer_norm" #"batch_norm" or "layer_norm" - first_normalization: "layer_norm" #"batch_norm" or "layer_norm" - - - - gnn: # Set as null to avoid a post-nn network - out_dim: 256 - hidden_dims: 256 - depth: 4 - activation: gelu - last_activation: none - dropout: 0.1 - normalization: "layer_norm" - last_normalization: *normalization - residual_type: simple - virtual_node: 'none' - layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps - layer_kwargs: # Parameters for the model itself. You could define dropout_attn: 0.1 - node_residual: false - mpnn_type: 'pyg:mpnnplus' - mpnn_kwargs: - in_dim: 256 - out_dim: 256 - in_dim_edges: 128 - out_dim_edges: 128 - attn_type: "none" # "full-attention", "none" - # biased_attention: false - attn_kwargs: null - - - graph_output_nn: - graph: - pooling: [sum] - out_dim: 256 - hidden_dims: 256 - depth: 1 - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - - task_heads: - homolumo: - task_level: graph - out_dim: 1 - hidden_dims: 256 - depth: 2 # Not needed if we have hidden_dims - activation: relu - last_activation: none - dropout: *dropout - normalization: *normalization - last_normalization: "none" - residual_type: none - -#Task-specific -predictor: - metrics_on_progress_bar: - homolumo: [] - metrics_on_training_set: - homolumo: ["pearsonr"] - loss_fun: - homolumo: mae_ipu - random_seed: *seed - optim_kwargs: - lr: 4.e-4 # warmup can be scheduled using torch_scheduler_kwargs - # weight_decay: 1.e-7 - torch_scheduler_kwargs: - module_type: WarmUpLinearLR - max_num_epochs: &max_epochs 100 - warmup_epochs: 10 - verbose: False - scheduler_kwargs: - # monitor: &monitor homolumo/mae/train - # mode: min - # frequency: 1 - target_nan_mask: null # null: no mask, 0: 0 mask, ignore: ignore nan values from loss - flag_kwargs: - n_steps: 0 # 1 - alpha: 0.0 # 0.01 - -# Task-specific -metrics: - homolumo: - - name: mae - metric: mae_ipu - target_nan_mask: null - multitask_handling: mean-per-label - threshold_kwargs: null - - name: pearsonr - metric: pearsonr_ipu - threshold_kwargs: null - target_nan_mask: null - multitask_handling: mean-per-label - -trainer: - seed: *seed - logger: - save_dir: logs/PCQMv2 - name: *name - project: PCQMv2_mpnn - #early_stopping: - # monitor: *monitor - # min_delta: 0 - # patience: 10 - # mode: &mode min - model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/ - filename: *name - #monitor: *monitor - #mode: *mode - save_top_k: 1 - every_n_epochs: 100 - trainer: - max_epochs: *max_epochs - min_epochs: 1 - check_val_every_n_epoch: 20 diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml index e5a78fdfc..903b7fe1d 100644 --- a/expts/hydra-configs/main.yaml +++ b/expts/hydra-configs/main.yaml @@ -1,7 +1,7 @@ defaults: - accelerator: ipu - dataset: pcqm4m - - model: gpspp + - model: mpnn # Specializations - experiment: ${dataset}_${model} From 96a4d913187622df25ee0dae5d0eec07779c4c33 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Fri, 28 Jul 2023 08:54:56 +0000 Subject: [PATCH 10/12] minor change --- expts/hydra-configs/accelerator/ipu.yaml | 3 ++- expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml | 3 ++- expts/hydra-configs/dataset/pcqm4m.yaml | 5 +++-- expts/hydra-configs/experiment/pcqm4m_gpspp.yaml | 3 ++- expts/hydra-configs/experiment/pcqm4m_mpnn.yaml | 3 ++- expts/hydra-configs/main.yaml | 4 ++-- expts/hydra-configs/model/mpnn.yaml | 3 ++- 7 files changed, 15 insertions(+), 9 deletions(-) diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml index 372297e4c..8fb9b07f6 100644 --- a/expts/hydra-configs/accelerator/ipu.yaml +++ b/expts/hydra-configs/accelerator/ipu.yaml @@ -13,4 +13,5 @@ ipu_inference_config: # gradient accumulation was set to 1 in the code - deviceIterations(1) - replicationFactor(1) - - Precision.enableStochasticRounding(False) \ No newline at end of file + - Precision.enableStochasticRounding(False) + \ No newline at end of file diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml index ebe372605..010327262 100644 --- a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml +++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml @@ -22,4 +22,5 @@ predictor: trainer: trainer: precision: 16-true - accumulate_grad_batches: 4 \ No newline at end of file + accumulate_grad_batches: 4 + \ No newline at end of file diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml index 41194ee2f..411287929 100644 --- a/expts/hydra-configs/dataset/pcqm4m.yaml +++ b/expts/hydra-configs/dataset/pcqm4m.yaml @@ -13,7 +13,7 @@ datamodule: # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] - sample_size: 8000 # use sample_size for test + # sample_size: 8000 # use sample_size for test splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` split_names: ["train", "valid", "test-dev"] # graphium/data/PCQM4Mv2/split_dict.pt @@ -218,4 +218,5 @@ trainer: trainer: max_epochs: *max_epochs min_epochs: 1 - check_val_every_n_epoch: 20 \ No newline at end of file + check_val_every_n_epoch: 20 + \ No newline at end of file diff --git a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml index d0e4e3a96..d8fceb8fc 100644 --- a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml +++ b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml @@ -10,4 +10,5 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/gpspp/ \ No newline at end of file + dirpath: models_checkpoints/PCMQ4Mv2/gpspp/ + \ No newline at end of file diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml index 28eda3bdf..6621137f8 100644 --- a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml +++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml @@ -10,4 +10,5 @@ constants: trainer: model_checkpoint: - dirpath: models_checkpoints/PCMQ4Mv2/mpnn/ \ No newline at end of file + dirpath: models_checkpoints/PCMQ4Mv2/mpnn/ + \ No newline at end of file diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml index 903b7fe1d..198bccb0c 100644 --- a/expts/hydra-configs/main.yaml +++ b/expts/hydra-configs/main.yaml @@ -1,7 +1,7 @@ defaults: - accelerator: ipu - - dataset: pcqm4m - - model: mpnn + - dataset: toymix + - model: gcn # Specializations - experiment: ${dataset}_${model} diff --git a/expts/hydra-configs/model/mpnn.yaml b/expts/hydra-configs/model/mpnn.yaml index 0d322ade3..0ac1432ed 100644 --- a/expts/hydra-configs/model/mpnn.yaml +++ b/expts/hydra-configs/model/mpnn.yaml @@ -13,4 +13,5 @@ architecture: out_dim_edges: 128 attn_type: "none" # "full-attention", "none" # biased_attention: false - attn_kwargs: null \ No newline at end of file + attn_kwargs: null + \ No newline at end of file From 83aa388d2647629fe2ddecff018b62d55bf94ca9 Mon Sep 17 00:00:00 2001 From: zhiyil-graphcore Date: Fri, 28 Jul 2023 08:58:41 +0000 Subject: [PATCH 11/12] remove space for new line --- expts/hydra-configs/accelerator/ipu.yaml | 1 - expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml | 1 - expts/hydra-configs/dataset/pcqm4m.yaml | 1 - expts/hydra-configs/experiment/pcqm4m_gpspp.yaml | 1 - expts/hydra-configs/experiment/pcqm4m_mpnn.yaml | 1 - expts/hydra-configs/model/mpnn.yaml | 1 - 6 files changed, 6 deletions(-) diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml index 8fb9b07f6..43e4455ef 100644 --- a/expts/hydra-configs/accelerator/ipu.yaml +++ b/expts/hydra-configs/accelerator/ipu.yaml @@ -14,4 +14,3 @@ ipu_inference_config: - deviceIterations(1) - replicationFactor(1) - Precision.enableStochasticRounding(False) - \ No newline at end of file diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml index 010327262..31cba7fc3 100644 --- a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml +++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml @@ -23,4 +23,3 @@ trainer: trainer: precision: 16-true accumulate_grad_batches: 4 - \ No newline at end of file diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml index 411287929..391bb21de 100644 --- a/expts/hydra-configs/dataset/pcqm4m.yaml +++ b/expts/hydra-configs/dataset/pcqm4m.yaml @@ -219,4 +219,3 @@ trainer: max_epochs: *max_epochs min_epochs: 1 check_val_every_n_epoch: 20 - \ No newline at end of file diff --git a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml index d8fceb8fc..a321e835d 100644 --- a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml +++ b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml @@ -11,4 +11,3 @@ constants: trainer: model_checkpoint: dirpath: models_checkpoints/PCMQ4Mv2/gpspp/ - \ No newline at end of file diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml index 6621137f8..08b1b1f3c 100644 --- a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml +++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml @@ -11,4 +11,3 @@ constants: trainer: model_checkpoint: dirpath: models_checkpoints/PCMQ4Mv2/mpnn/ - \ No newline at end of file diff --git a/expts/hydra-configs/model/mpnn.yaml b/expts/hydra-configs/model/mpnn.yaml index 0ac1432ed..4a8a428e8 100644 --- a/expts/hydra-configs/model/mpnn.yaml +++ b/expts/hydra-configs/model/mpnn.yaml @@ -14,4 +14,3 @@ architecture: attn_type: "none" # "full-attention", "none" # biased_attention: false attn_kwargs: null - \ No newline at end of file From f60b9688cf60a5c9674de9eff195a69aec988ad4 Mon Sep 17 00:00:00 2001 From: Zhiyi Li <86362692+zhiyil-graphcore@users.noreply.github.com> Date: Fri, 28 Jul 2023 14:55:04 +0100 Subject: [PATCH 12/12] Update pcqm4m_ipu.yaml --- expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml index 31cba7fc3..6502f9414 100644 --- a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml +++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml @@ -11,7 +11,7 @@ datamodule: max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118 max_num_edges_per_graph: 120 # Data handling-related - batch_size_training: 64 + batch_size_training: 32 batch_size_inference: 16 predictor: @@ -22,4 +22,4 @@ predictor: trainer: trainer: precision: 16-true - accumulate_grad_batches: 4 + accumulate_grad_batches: 2