From 70841984211bfdeaaa51fba41d642c153d6b695a Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Fri, 21 Jul 2023 09:46:19 +0000
Subject: [PATCH 01/12] pcqm4m changes

---
 expts/configs/config_mpnn_10M_pcqm4m.yaml | 15 +++++++--------
 graphium/data/datamodule.py               |  9 ++++++---
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml
index 63fab1970..2d648fd1f 100644
--- a/expts/configs/config_mpnn_10M_pcqm4m.yaml
+++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml
@@ -56,23 +56,21 @@ datamodule:
       homolumo:
         df: null
         task_level: "graph"
-        df_path: graphium/data/PCQM4M/pcqm4mv2-20k.csv
+        df_path: graphium/data/PCQM4M/pcqm4mv2.csv
         # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv
         # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
         smiles_col: "cxsmiles"
         label_cols: ["homo_lumo_gap"]
-        sample_size: 8000 # use sample_size for test
-        # splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
-        # split_names: ["train", "valid", "test-dev"]
+        # sample_size: 8000 # use sample_size for test
+        splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
+        split_names: ["train", "valid", "test-dev"]
         # graphium/data/PCQM4Mv2/split_dict.pt
         # graphium/data/PCQM4Mv2/pcqm4m_split.csv
-        split_val: 0.1
-        split_test: 0.1
+        # split_val: 0.1
+        # split_test: 0.1
         seed: *seed
         label_normalization:
           method: "normal"
-          min_clipping: 0
-          max_clipping: 50
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
@@ -261,6 +259,7 @@ metrics:
       multitask_handling: mean-per-label
 
 trainer:
+  seed: *seed
   logger:
     save_dir: logs/PCQMv2
     name: *name
diff --git a/graphium/data/datamodule.py b/graphium/data/datamodule.py
index d4c0966ed..79ab1abc9 100644
--- a/graphium/data/datamodule.py
+++ b/graphium/data/datamodule.py
@@ -1906,9 +1906,12 @@ def _get_split_indices(
                     f"file type `{file_type}` for `{splits_path}` not recognised, please use .pt, .csv or .tsv"
                 )
             train, val, test = split_names
-            train_indices = np.asarray(splits[train].dropna()).astype("int").tolist()
-            val_indices = np.asarray(splits[val].dropna()).astype("int").tolist()
-            test_indices = np.asarray(splits[test].dropna()).astype("int").tolist()
+            train_indices = np.asarray(splits[train]).astype("int")
+            train_indices = train_indices[~np.isnan(train_indices)].tolist()
+            val_indices = np.asarray(splits[val]).astype("int")
+            val_indices = val_indices[~np.isnan(val_indices)].tolist()
+            test_indices = np.asarray(splits[test]).astype("int")
+            test_indices = test_indices[~np.isnan(test_indices)].tolist()
 
         # Filter train, val and test indices
         _, train_idx, _ = np.intersect1d(sample_idx, train_indices, return_indices=True)

From 4a9cde583f40eddb9608ff372bcbce72aaac0a36 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Tue, 25 Jul 2023 14:07:57 +0000
Subject: [PATCH 02/12] mpnn and gps++ changes

---
 expts/configs/config_gpspp_10M_pcqm4m.yaml    | 108 +++++++++++++-----
 expts/configs/config_mpnn_10M_pcqm4m.yaml     |  19 ++-
 .../neurips2023_configs/config_large_gcn.yaml |   1 +
 3 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml
index c6862cb06..649b9e045 100644
--- a/expts/configs/config_gpspp_10M_pcqm4m.yaml
+++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml
@@ -3,8 +3,59 @@ constants:
   name: &name pcqm4mv2_gpspp_4layer
   seed: &seed 42
   raise_train_error: true   # Whether the code should raise an error if it crashes during training
-  accelerator:
-    type: ipu  # cpu or ipu or gpu
+  entity: multitask-gnn
+
+accelerator:
+  type: ipu  # cpu or ipu or gpu
+  config_override:
+    datamodule:
+      args:
+        ipu_dataloader_training_opts:
+          mode: async
+          max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54
+          max_num_edges_per_graph: 60
+        ipu_dataloader_inference_opts:
+          mode: async
+          max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118
+          max_num_edges_per_graph: 120
+        # Data handling-related
+        batch_size_training: 16
+        batch_size_inference: 16
+    predictor:
+      optim_kwargs:
+        loss_scaling: 1024
+    trainer:
+      trainer:
+        precision: 16-true
+        accumulate_grad_batches: 4
+
+  ipu_config:
+    - deviceIterations(2) # IPU would require large batches to be ready for the model.
+    - replicationFactor(4)
+    # - enableProfiling("graph_analyser")       # The folder where the profile will be stored
+    # - enableExecutableCaching("pop_compiler_cache")
+    - TensorLocations.numIOTiles(128)
+    - _Popart.set("defaultBufferingDepth", 128)
+    - Precision.enableStochasticRounding(True)
+
+  ipu_inference_config:
+  # set device iteration and replication factor to 1 during inference
+  # gradient accumulation was set to 1 in the code
+    - deviceIterations(1)
+    - replicationFactor(1)
+    - Precision.enableStochasticRounding(False)
+
+# accelerator:
+#   type: cpu  # cpu or ipu or gpu
+#   config_override:
+#     args:
+#       datamodule:
+#         batch_size_training: 256
+#         batch_size_inference: 64
+#     trainer:
+#       trainer:
+#         precision: 32
+#         accumulate_grad_batches: 1
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
@@ -14,20 +65,20 @@ datamodule:
       homolumo:
         df: null
         task_level: "graph"
-        df_path: graphium/data/PCQM4Mv2/pcqm4mv2.csv #graphium/data/PCQM4Mv2/pcqm4mv2.csv
+        df_path: graphium/data/PCQM4M/pcqm4mv2.csv #graphium/data/PCQM4Mv2/pcqm4mv2.csv
         # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv
         # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
         smiles_col: "cxsmiles"
         label_cols: ["homo_lumo_gap"]
-        # sample_size: 80000 # use sample_size for test
-        splits_path: graphium/data/PCQM4Mv2/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
+        sample_size: 8000 # use sample_size for test
+        splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
         # graphium/data/PCQM4Mv2/split_dict.pt
         # graphium/data/PCQM4Mv2/pcqm4m_split.csv
         split_names: ["train", "valid", "test-dev"]
         label_normalization:
           method: "normal"
-          min_clipping: 0
-          max_clipping: 50
+          min_clipping: [0]
+          max_clipping: [50]
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
@@ -49,36 +100,29 @@ datamodule:
       use_bonds_weights: False
       pos_encoding_as_features: # encoder dropout 0.18
         pos_types:
-          la_pos: &pos_enc
-            pos_type: laplacian_eigvec_eigval #laplacian_eigvec
+          lap_eigvec:
+            pos_level: node
+            pos_type: laplacian_eigvec
+            num_pos: 8
+            normalization: "none" # nomrlization already applied on the eigen vectors
+            disconnected_comp: True # if eigen values/vector for disconnected graph are included
+          lap_eigval:
+            pos_level: node
+            pos_type: laplacian_eigval
             num_pos: 8
             normalization: "none" # nomrlization already applied on the eigen vectors
             disconnected_comp: True # if eigen values/vector for disconnected graph are included
           rw_pos: # use same name as pe_encoder
-            pos_type: rwse
+            pos_level: node
+            pos_type: rw_return_probs
             ksteps: 16
 
 
-    # Data handling-related
-    batch_size_training: 16
-    batch_size_inference: 16
     # cache_data_path: .
     num_workers: 20 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
-    ipu_dataloader_training_opts:
-      mode: async
-      max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54
-      max_num_edges_per_graph: 60
-
-    ipu_dataloader_inference_opts:
-      mode: async
-      max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118
-      max_num_edges_per_graph: 120
-      # test-dev max nodes: 50, max_edges: 116
-      # test-challenge max nodes: 51, max_edges: 106
 
 architecture:
   model_type: FullGraphMultiTaskNetwork
@@ -173,10 +217,22 @@ architecture:
       droppath_rate_ffn: 0.0
 
 
-  post_nn: null
+  graph_output_nn:
+    graph:
+      pooling: [sum]
+      out_dim: 256
+      hidden_dims: 256
+      depth: 1
+      activation: relu
+      last_activation: none
+      dropout: *dropout
+      normalization: *normalization
+      last_normalization: "none"
+      residual_type: none
 
   task_heads:
     homolumo:
+      task_level: graph
       out_dim: 1
       hidden_dims: 256
       depth: 2                          # Not needed if we have hidden_dims
diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml
index 2d648fd1f..ffb294cb3 100644
--- a/expts/configs/config_mpnn_10M_pcqm4m.yaml
+++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml
@@ -3,6 +3,7 @@ constants:
   name: &name pcqm4mv2_mpnn_4layer
   seed: &seed 42
   raise_train_error: true   # Whether the code should raise an error if it crashes during training
+  entity: multitask-gnn
 
 accelerator:
   type: ipu  # cpu or ipu or gpu
@@ -37,12 +38,20 @@ accelerator:
     - _Popart.set("defaultBufferingDepth", 128)
     - Precision.enableStochasticRounding(True)
 
+  ipu_inference_config:
+  # set device iteration and replication factor to 1 during inference
+  # gradient accumulation was set to 1 in the code
+    - deviceIterations(1)
+    - replicationFactor(1)
+    - Precision.enableStochasticRounding(False)
+
 # accelerator:
 #   type: cpu  # cpu or ipu or gpu
 #   config_override:
-#     datamodule:
-#       batch_size_training: 256
-#       batch_size_inference: 64
+#     args:
+#       datamodule:
+#         batch_size_training: 256
+#         batch_size_inference: 64
 #     trainer:
 #       trainer:
 #         precision: 32
@@ -71,6 +80,8 @@ datamodule:
         seed: *seed
         label_normalization:
           method: "normal"
+          min_clipping: [0]
+          max_clipping: [50]
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
@@ -112,8 +123,6 @@ datamodule:
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
-
 
 architecture:
   model_type: FullGraphMultiTaskNetwork
diff --git a/expts/neurips2023_configs/config_large_gcn.yaml b/expts/neurips2023_configs/config_large_gcn.yaml
index 7745693b3..c972f855a 100644
--- a/expts/neurips2023_configs/config_large_gcn.yaml
+++ b/expts/neurips2023_configs/config_large_gcn.yaml
@@ -3,6 +3,7 @@ constants:
   name: &name neurips2023_large_data_gcn
   seed: &seed 42
   raise_train_error: true   # Whether the code should raise an error if it crashes during training
+  entity: multitask-gnn
 
 accelerator:
   type: ipu  # cpu or ipu or gpu

From 5bcb69c2690c1ff2d4461678fc64e0bd1828f2da Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Tue, 25 Jul 2023 15:05:07 +0000
Subject: [PATCH 03/12] gpspp tested ok

---
 expts/configs/config_gpspp_10M_pcqm4m.yaml | 51 ++++++++++++++++------
 expts/configs/config_mpnn_10M_pcqm4m.yaml  |  6 +--
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml
index 649b9e045..dad4ac7b2 100644
--- a/expts/configs/config_gpspp_10M_pcqm4m.yaml
+++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml
@@ -30,8 +30,8 @@ accelerator:
         accumulate_grad_batches: 4
 
   ipu_config:
-    - deviceIterations(2) # IPU would require large batches to be ready for the model.
-    - replicationFactor(4)
+    - deviceIterations(20) # IPU would require large batches to be ready for the model.
+    - replicationFactor(16)
     # - enableProfiling("graph_analyser")       # The folder where the profile will be stored
     # - enableExecutableCaching("pop_compiler_cache")
     - TensorLocations.numIOTiles(128)
@@ -70,11 +70,12 @@ datamodule:
         # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
         smiles_col: "cxsmiles"
         label_cols: ["homo_lumo_gap"]
-        sample_size: 8000 # use sample_size for test
+        # sample_size: 8000 # use sample_size for test
         splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
         # graphium/data/PCQM4Mv2/split_dict.pt
         # graphium/data/PCQM4Mv2/pcqm4m_split.csv
         split_names: ["train", "valid", "test-dev"]
+        seed: *seed
         label_normalization:
           method: "normal"
           min_clipping: [0]
@@ -82,10 +83,10 @@ datamodule:
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 20
+    featurization_n_jobs: 30
     featurization_progress: True
     featurization_backend: "loky"
-    processed_graph_data_path: "/tmp/graphium_data/PCQM4Mv2/"
+    processed_graph_data_path: "../datacache/PCQM4Mv2/"
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -119,7 +120,7 @@ datamodule:
 
 
     # cache_data_path: .
-    num_workers: 20 # -1 to use all
+    num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
 
@@ -156,29 +157,29 @@ architecture:
     encoders: #la_pos |  rw_pos
       la_pos:  # Set as null to avoid a pre-nn network
         encoder_type: "laplacian_pe"
-        input_keys: ["eigvecs", "eigvals"]
+        input_keys: ["laplacian_eigvec", "laplacian_eigval"]
         output_keys: ["feat"]
         hidden_dim: 64
         out_dim: 32
         model_type: 'DeepSet' #'Transformer' or 'DeepSet'
         num_layers: 2
         num_layers_post: 1 # Num. layers to apply after pooling
-        dropout: 0.18
+        dropout: 0.1
         first_normalization: "none" #"batch_norm" or "layer_norm"
       rw_pos:
         encoder_type: "mlp"
-        input_keys: ["rwse"]
+        input_keys: ["rw_return_probs"]
         output_keys: ["feat"]
         hidden_dim: 64
         out_dim: 32
         num_layers: 2
-        dropout: 0.18
+        dropout: 0.1
         normalization: "layer_norm" #"batch_norm" or "layer_norm"
         first_normalization: "layer_norm" #"batch_norm" or "layer_norm"
       gaussian_pos: # 3D_bias
         encoder_type: "gaussian_kernel"
         input_keys: ["positions_3d"]
-        output_keys: ["feat", "graph_gaussian_bias_3d"]
+        output_keys: ["feat", "nodepair_gaussian_bias_3d"]
         num_heads: 32
         num_layers: 1 #2
         embed_dim: 32
@@ -197,7 +198,6 @@ architecture:
     normalization: "layer_norm"
     last_normalization: *normalization
     residual_type: simple
-    pooling: [sum]
     virtual_node: 'none'
     layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps
     layer_kwargs:  # Parameters for the model itself. You could define dropout_attn: 0.1
@@ -210,7 +210,7 @@ architecture:
         out_dim_edges: 128
       attn_type: "full-attention" # "full-attention", "none"
       precision: &precision 16
-      biased_attention_key: "graph_gaussian_bias_3d" # 3D_bias
+      biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias
       attn_kwargs:
         num_heads: 32
       droppath_rate_attn: 0.0
@@ -274,10 +274,33 @@ metrics:
     - name: mae
       metric: mae_ipu
       target_nan_mask: null
-      multitask_handling: flatten
+      multitask_handling: mean-per-label
       threshold_kwargs: null
     - name: pearsonr
       metric: pearsonr_ipu
       threshold_kwargs: null
       target_nan_mask: null
       multitask_handling: mean-per-label
+
+trainer:
+  seed: *seed
+  logger:
+    save_dir: logs/PCQMv2
+    name: *name
+    project: PCQMv2_gpspp
+  #early_stopping:
+  #  monitor: *monitor
+  #  min_delta: 0
+  #  patience: 10
+  #  mode: &mode min
+  model_checkpoint:
+    dirpath: models_checkpoints/PCMQ4Mv2/
+    filename: *name
+    #monitor: *monitor
+    #mode: *mode
+    save_top_k: 1
+    every_n_epochs: 100
+  trainer:
+    max_epochs: *max_epochs
+    min_epochs: 1
+    check_val_every_n_epoch: 20
\ No newline at end of file
diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml
index ffb294cb3..cbe1ded53 100644
--- a/expts/configs/config_mpnn_10M_pcqm4m.yaml
+++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml
@@ -1,4 +1,4 @@
-# Testing the mpnn only model with the PCQMv2 dataset on IPU.
+# MPNN model with the PCQMv2 dataset on IPU.
 constants:
   name: &name pcqm4mv2_mpnn_4layer
   seed: &seed 42
@@ -85,7 +85,7 @@ datamodule:
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 20
+    featurization_n_jobs: 30
     featurization_progress: True
     featurization_backend: "loky"
     processed_graph_data_path: "../datacache/PCQM4Mv2/"
@@ -259,7 +259,7 @@ metrics:
     - name: mae
       metric: mae_ipu
       target_nan_mask: null
-      multitask_handling: flatten
+      multitask_handling: mean-per-label
       threshold_kwargs: null
     - name: pearsonr
       metric: pearsonr_ipu

From 5f69e792bd0d636216c48aa1ba586534d8204c95 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Tue, 25 Jul 2023 15:07:36 +0000
Subject: [PATCH 04/12] minot update for gcn large

---
 expts/neurips2023_configs/config_large_gcn.yaml             | 1 -
 expts/neurips2023_configs/debug/config_large_gcn_debug.yaml | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/expts/neurips2023_configs/config_large_gcn.yaml b/expts/neurips2023_configs/config_large_gcn.yaml
index c972f855a..033b8a5f5 100644
--- a/expts/neurips2023_configs/config_large_gcn.yaml
+++ b/expts/neurips2023_configs/config_large_gcn.yaml
@@ -171,7 +171,6 @@ datamodule:
     num_workers: 32 # -1 to use all
     persistent_workers: True # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
 
 architecture:
diff --git a/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml b/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml
index db18ee10c..d6e4cb724 100644
--- a/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml
+++ b/expts/neurips2023_configs/debug/config_large_gcn_debug.yaml
@@ -3,6 +3,7 @@ constants:
   name: &name neurips2023_large_data_gcn_debug
   seed: &seed 100
   raise_train_error: true   # Whether the code should raise an error if it crashes during training
+  entity: multitask-gnn
 
 accelerator:
   type: ipu  # cpu or ipu or gpu
@@ -169,7 +170,6 @@ datamodule:
     num_workers: 30 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
 
 architecture:

From 043f7ae91f82444c502171bcd736550c3a91c920 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Thu, 27 Jul 2023 08:54:50 +0000
Subject: [PATCH 05/12] add options for throughput optimization

---
 expts/configs/config_gpspp_10M_pcqm4m.yaml | 11 ++++++-----
 expts/configs/config_mpnn_10M_pcqm4m.yaml  |  9 +++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml
index dad4ac7b2..3bfaf3de2 100644
--- a/expts/configs/config_gpspp_10M_pcqm4m.yaml
+++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml
@@ -16,12 +16,13 @@ accelerator:
           max_num_edges_per_graph: 60
         ipu_dataloader_inference_opts:
           mode: async
-          max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118
+          max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
           max_num_edges_per_graph: 120
         # Data handling-related
         batch_size_training: 16
         batch_size_inference: 16
     predictor:
+      metrics_every_n_train_steps: 1000
       optim_kwargs:
         loss_scaling: 1024
     trainer:
@@ -78,8 +79,6 @@ datamodule:
         seed: *seed
         label_normalization:
           method: "normal"
-          min_clipping: [0]
-          max_clipping: [50]
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
@@ -209,7 +208,7 @@ architecture:
         in_dim_edges: 128
         out_dim_edges: 128
       attn_type: "full-attention" # "full-attention", "none"
-      precision: &precision 16
+      precision: &precision 16-true
       biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias
       attn_kwargs:
         num_heads: 32
@@ -246,7 +245,9 @@ architecture:
 #Task-specific
 predictor:
   metrics_on_progress_bar:
-    homolumo: ["mae", "pearsonr"]
+    homolumo: []
+  metrics_on_training_set:
+    homolumo: ["pearsonr"]  
   loss_fun:
     homolumo: mae_ipu
   random_seed: *seed
diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml
index cbe1ded53..85291441e 100644
--- a/expts/configs/config_mpnn_10M_pcqm4m.yaml
+++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml
@@ -16,12 +16,13 @@ accelerator:
           max_num_edges_per_graph: 60
         ipu_dataloader_inference_opts:
           mode: async
-          max_num_nodes_per_graph: 16 # valid max nodes: 51, max_edges: 118
+          max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
           max_num_edges_per_graph: 120
         # Data handling-related
         batch_size_training: 64
         batch_size_inference: 16
     predictor:
+      metrics_every_n_train_steps: 1000
       optim_kwargs:
         loss_scaling: 1024
     trainer:
@@ -80,8 +81,6 @@ datamodule:
         seed: *seed
         label_normalization:
           method: "normal"
-          min_clipping: [0]
-          max_clipping: [50]
 
     # Featurization
     prepare_dict_or_graph: pyg:graph
@@ -232,7 +231,9 @@ architecture:
 #Task-specific
 predictor:
   metrics_on_progress_bar:
-    homolumo: ["mae", "pearsonr"]
+    homolumo: []
+  metrics_on_training_set:
+    homolumo: ["pearsonr"]  
   loss_fun:
     homolumo: mae_ipu
   random_seed: *seed

From a5f4ebce7c02c81bd9a2a16573957482f6efe676 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Thu, 27 Jul 2023 15:25:15 +0000
Subject: [PATCH 06/12] fix run_validation_test.py after hydra changes

---
 expts/configs/config_mpnn_10M_pcqm4m.yaml |  2 +-
 expts/main_run_multitask.py               |  1 -
 expts/run_validation_test.py              | 38 +++++++----------------
 3 files changed, 12 insertions(+), 29 deletions(-)

diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml
index 85291441e..2a4c18b5b 100644
--- a/expts/configs/config_mpnn_10M_pcqm4m.yaml
+++ b/expts/configs/config_mpnn_10M_pcqm4m.yaml
@@ -12,7 +12,7 @@ accelerator:
       args:
         ipu_dataloader_training_opts:
           mode: async
-          max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54
+          max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54
           max_num_edges_per_graph: 60
         ipu_dataloader_inference_opts:
           mode: async
diff --git a/expts/main_run_multitask.py b/expts/main_run_multitask.py
index 64e1c185b..c14670377 100644
--- a/expts/main_run_multitask.py
+++ b/expts/main_run_multitask.py
@@ -73,7 +73,6 @@ def main(cfg: DictConfig) -> None:
     save_params_to_wandb(trainer.logger, cfg, predictor, datamodule)
 
     # Determine the max num nodes and edges in training and validation
-    logger.info("About to set the max nodes etc.")
     predictor.set_max_nodes_edges_per_graph(datamodule, stages=["train", "val"])
 
     # Run the model training
diff --git a/expts/run_validation_test.py b/expts/run_validation_test.py
index cf6248d24..06804301c 100644
--- a/expts/run_validation_test.py
+++ b/expts/run_validation_test.py
@@ -4,7 +4,7 @@
 from os.path import dirname, abspath
 import yaml
 from copy import deepcopy
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 import timeit
 from loguru import logger
 from datetime import datetime
@@ -20,41 +20,33 @@
     load_trainer,
     save_params_to_wandb,
     load_accelerator,
-    load_yaml_config,
 )
 from graphium.utils.safe_run import SafeRun
 
+import hydra
 
 # WandB
 import wandb
 
 # Set up the working directory
 MAIN_DIR = dirname(dirname(abspath(graphium.__file__)))
-
-# CONFIG_FILE = "expts/configs/config_mpnn_10M_b3lyp.yaml"
-# CONFIG_FILE = "expts/configs/config_mpnn_10M_pcqm4m.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_debug.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_large_mpnn.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_large_gcn.yaml"
-CONFIG_FILE = "expts/neurips2023_configs/debug/config_large_gcn_debug.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_large_gin.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_large_gine.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_small_gcn.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_large_gcn.yaml"
-# CONFIG_FILE = "exptas/neurips2023_configs/config_small_gin.yaml"
-# CONFIG_FILE = "expts/neurips2023_configs/config_small_gine.yaml"
 os.chdir(MAIN_DIR)
 
 
-def main(cfg: DictConfig, run_name: str = "main", add_date_time: bool = True) -> None:
+@hydra.main(version_base=None, config_path="hydra-configs", config_name="main")
+def main(cfg: DictConfig) -> None:
+    cfg = OmegaConf.to_container(cfg, resolve=True)
+
+    run_name: str = "main"
+    add_date_time: bool = True
+
     st = timeit.default_timer()
 
     date_time_suffix = ""
     if add_date_time:
         date_time_suffix = datetime.now().strftime("%d.%m.%Y_%H.%M.%S")
 
-    cfg = deepcopy(cfg)
-    wandb.init(project=cfg["constants"]["name"], config=cfg)
+    wandb.init(entity=cfg["constants"]["entity"], project=cfg["constants"]["name"], config=cfg)
 
     # Initialize the accelerator
     cfg, accelerator_type = load_accelerator(cfg)
@@ -110,12 +102,4 @@ def main(cfg: DictConfig, run_name: str = "main", add_date_time: bool = True) ->
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--config", help="Path to the config file", default=None)
-
-    args, unknown_args = parser.parse_known_args()
-    if args.config is not None:
-        CONFIG_FILE = args.config
-    cfg = load_yaml_config(CONFIG_FILE, MAIN_DIR, unknown_args)
-
-    main(cfg)
+    main()

From 06bfe9fcae55641178b044b06d344a981ead3054 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Thu, 27 Jul 2023 16:04:21 +0000
Subject: [PATCH 07/12] cpnvert pcqm4m mpnn to hydra configs

---
 expts/hydra-configs/accelerator/ipu.yaml      |   4 +-
 .../dataset/accelerator/pcqm4m_ipu.yaml       |  25 ++
 expts/hydra-configs/dataset/pcqm4m.yaml       | 221 ++++++++++++++++++
 .../hydra-configs/experiment/pcqm4m_mpnn.yaml |  13 ++
 expts/hydra-configs/main.yaml                 |   4 +-
 expts/hydra-configs/model/mpnn.yaml           |  16 ++
 6 files changed, 279 insertions(+), 4 deletions(-)
 create mode 100644 expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
 create mode 100644 expts/hydra-configs/dataset/pcqm4m.yaml
 create mode 100644 expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
 create mode 100644 expts/hydra-configs/model/mpnn.yaml

diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml
index 6e7fc8e06..b7075af97 100644
--- a/expts/hydra-configs/accelerator/ipu.yaml
+++ b/expts/hydra-configs/accelerator/ipu.yaml
@@ -1,9 +1,9 @@
 type: ipu
 ipu_config:
-    - deviceIterations(5) # IPU would require large batches to be ready for the model.
+    - deviceIterations(30) # IPU would require large batches to be ready for the model.
     - replicationFactor(16)
     # - enableProfiling("graph_analyser")       # The folder where the profile will be stored
     # - enableExecutableCaching("pop_compiler_cache")
     - TensorLocations.numIOTiles(128)
-    - _Popart.set("defaultBufferingDepth", 128)
+    - _Popart.set("defaultBufferingDepth", 96)
     - Precision.enableStochasticRounding(True)
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
new file mode 100644
index 000000000..ebe372605
--- /dev/null
+++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
@@ -0,0 +1,25 @@
+# @package _global_
+
+datamodule:
+  args:
+    ipu_dataloader_training_opts:
+      mode: async
+      max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54
+      max_num_edges_per_graph: 60
+    ipu_dataloader_inference_opts:
+      mode: async
+      max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
+      max_num_edges_per_graph: 120
+    # Data handling-related
+    batch_size_training: 64
+    batch_size_inference: 16
+
+predictor:
+  metrics_every_n_train_steps: 1000
+  optim_kwargs:
+    loss_scaling: 1024
+
+trainer:
+  trainer:
+    precision: 16-true
+    accumulate_grad_batches: 4
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml
new file mode 100644
index 000000000..46f36e00d
--- /dev/null
+++ b/expts/hydra-configs/dataset/pcqm4m.yaml
@@ -0,0 +1,221 @@
+# @package _global_
+
+datamodule:
+  module_type: "MultitaskFromSmilesDataModule"
+  # module_type: "FakeDataModule"  # Option to use generated data
+  args: # Matches that in the test_multitask_datamodule.py case.
+    task_specific_args:   # To be replaced by a new class "DatasetParams"
+      homolumo:
+        df: null
+        task_level: "graph"
+        df_path: graphium/data/PCQM4M/pcqm4mv2.csv
+        # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv
+        # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
+        smiles_col: "cxsmiles"
+        label_cols: ["homo_lumo_gap"]
+        # sample_size: 8000 # use sample_size for test
+        splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
+        split_names: ["train", "valid", "test-dev"]
+        # graphium/data/PCQM4Mv2/split_dict.pt
+        # graphium/data/PCQM4Mv2/pcqm4m_split.csv
+        # split_val: 0.1
+        # split_test: 0.1
+        seed: ${constants.seed}
+        label_normalization:
+          method: "normal"
+
+    # Featurization
+    prepare_dict_or_graph: pyg:graph
+    featurization_n_jobs: 30
+    featurization_progress: True
+    featurization_backend: "loky"
+    processed_graph_data_path: "../datacache/PCQM4Mv2/"
+    featurization:
+    # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
+    # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
+    # 'num_chiral_centers (not included yet)']
+      atom_property_list_onehot: [atomic-number, group, period, total-valence]
+      atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring]
+      # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring']
+      edge_property_list: [bond-type-onehot, stereo, in-ring]
+      add_self_loop: False
+      explicit_H: False # if H is included
+      use_bonds_weights: False
+      pos_encoding_as_features: # encoder dropout 0.18
+        pos_types:
+          lap_eigvec:
+            pos_level: node
+            pos_type: laplacian_eigvec
+            num_pos: 8
+            normalization: "none" # nomrlization already applied on the eigen vectors
+            disconnected_comp: True # if eigen values/vector for disconnected graph are included
+          lap_eigval:
+            pos_level: node
+            pos_type: laplacian_eigval
+            num_pos: 8
+            normalization: "none" # nomrlization already applied on the eigen vectors
+            disconnected_comp: True # if eigen values/vector for disconnected graph are included
+          rw_pos: # use same name as pe_encoder
+            pos_level: node
+            pos_type: rw_return_probs
+            ksteps: 16
+
+    # cache_data_path: .
+    num_workers: 30 # -1 to use all
+    persistent_workers: False # if use persistent worker at the start of each epoch.
+    # Using persistent_workers false might make the start of each epoch very long.
+
+architecture:
+  model_type: FullGraphMultiTaskNetwork
+  mup_base_path: null
+  pre_nn:   # Set as null to avoid a pre-nn network
+    out_dim: 256
+    hidden_dims: 1024
+    depth: 2
+    activation: relu
+    last_activation: none
+    dropout: &dropout 0.18
+    normalization: &normalization layer_norm
+    last_normalization: *normalization
+    residual_type: none
+
+  pre_nn_edges:   # Set as null to avoid a pre-nn network
+    out_dim: 128
+    hidden_dims: 512
+    depth: 2
+    activation: relu
+    last_activation: none
+    dropout: *dropout
+    normalization: *normalization
+    last_normalization: *normalization
+    residual_type: none
+
+  pe_encoders:
+    out_dim: 32
+    pool: "sum" #"mean" "max"
+    last_norm: None #"batch_norm", "layer_norm"
+    encoders: #la_pos |  rw_pos
+      la_pos:  # Set as null to avoid a pre-nn network
+        encoder_type: "laplacian_pe"
+        input_keys: ["laplacian_eigvec", "laplacian_eigval"]
+        output_keys: ["feat"]
+        hidden_dim: 64
+        out_dim: 32
+        model_type: 'DeepSet' #'Transformer' or 'DeepSet'
+        num_layers: 2
+        num_layers_post: 1 # Num. layers to apply after pooling
+        dropout: 0.1
+        first_normalization: "none" #"batch_norm" or "layer_norm"
+      rw_pos:
+        encoder_type: "mlp"
+        input_keys: ["rw_return_probs"]
+        output_keys: ["feat"]
+        hidden_dim: 64
+        out_dim: 32
+        num_layers: 2
+        dropout: 0.1
+        normalization: "layer_norm" #"batch_norm" or "layer_norm"
+        first_normalization: "layer_norm" #"batch_norm" or "layer_norm"
+
+
+
+  gnn:  # Set as null to avoid a post-nn network
+    out_dim: 256
+    hidden_dims: 256
+    depth: 4
+    activation: gelu
+    last_activation: none
+    dropout: 0.1
+    normalization: "layer_norm"
+    last_normalization: *normalization
+    residual_type: simple
+    virtual_node: 'none'
+
+  graph_output_nn:
+    graph:
+      pooling: [sum]
+      out_dim: 256
+      hidden_dims: 256
+      depth: 1
+      activation: relu
+      last_activation: none
+      dropout: *dropout
+      normalization: *normalization
+      last_normalization: "none"
+      residual_type: none
+
+  task_heads:
+    homolumo:
+      task_level: graph
+      out_dim: 1
+      hidden_dims: 256
+      depth: 2                          # Not needed if we have hidden_dims
+      activation: relu
+      last_activation: none
+      dropout: *dropout
+      normalization: *normalization
+      last_normalization: "none"
+      residual_type: none
+
+#Task-specific
+predictor:
+  metrics_on_progress_bar:
+    homolumo: []
+  metrics_on_training_set:
+    homolumo: ["pearsonr"]  
+  loss_fun:
+    homolumo: mae_ipu
+  random_seed: ${constants.seed}
+  optim_kwargs:
+    lr: 4.e-4 # warmup can be scheduled using torch_scheduler_kwargs
+    # weight_decay: 1.e-7
+  torch_scheduler_kwargs:
+    module_type: WarmUpLinearLR
+    max_num_epochs: &max_epochs 100
+    warmup_epochs: 10
+    verbose: False
+  scheduler_kwargs:
+  #  monitor: &monitor homolumo/mae/train
+  #  mode: min
+  #  frequency: 1
+  target_nan_mask: null # null: no mask, 0: 0 mask, ignore: ignore nan values from loss
+  flag_kwargs:
+    n_steps: 0 # 1
+    alpha: 0.0 # 0.01
+
+# Task-specific
+metrics:
+  homolumo:
+    - name: mae
+      metric: mae_ipu
+      target_nan_mask: null
+      multitask_handling: mean-per-label
+      threshold_kwargs: null
+    - name: pearsonr
+      metric: pearsonr_ipu
+      threshold_kwargs: null
+      target_nan_mask: null
+      multitask_handling: mean-per-label
+
+trainer:
+  seed: ${constants.seed}
+  logger:
+    save_dir: logs/PCQMv2
+    name: ${constants.name}
+    project: PCQMv2_mpnn
+  #early_stopping:
+  #  monitor: *monitor
+  #  min_delta: 0
+  #  patience: 10
+  #  mode: &mode min
+  model_checkpoint:
+    dirpath: models_checkpoints/PCMQ4Mv2/
+    filename: ${constants.name}
+    #monitor: *monitor
+    #mode: *mode
+    save_top_k: 1
+    every_n_epochs: 100
+  trainer:
+    max_epochs: *max_epochs
+    min_epochs: 1
+    check_val_every_n_epoch: 20
\ No newline at end of file
diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
new file mode 100644
index 000000000..037f5e967
--- /dev/null
+++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
@@ -0,0 +1,13 @@
+# @package _global_
+
+# MPNN model with the PCQMv2 dataset on IPU.
+constants:
+  name: pcqm4mv2_mpnn_4layer
+  entity: "multitask-gnn"
+  seed: 42
+  max_epochs: 100
+  raise_train_error: true   # Whether the code should raise an error if it crashes during training
+
+trainer:
+  model_checkpoint:
+    dirpath: models_checkpoints/PCMQ4Mv2/
\ No newline at end of file
diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml
index 198bccb0c..903b7fe1d 100644
--- a/expts/hydra-configs/main.yaml
+++ b/expts/hydra-configs/main.yaml
@@ -1,7 +1,7 @@
 defaults:
   - accelerator: ipu
-  - dataset: toymix
-  - model: gcn
+  - dataset: pcqm4m
+  - model: mpnn
 
   # Specializations
   - experiment: ${dataset}_${model}
diff --git a/expts/hydra-configs/model/mpnn.yaml b/expts/hydra-configs/model/mpnn.yaml
new file mode 100644
index 000000000..0d322ade3
--- /dev/null
+++ b/expts/hydra-configs/model/mpnn.yaml
@@ -0,0 +1,16 @@
+# @package _global_
+
+architecture:
+  gnn:
+    layer_type: 'pyg:gps'
+    layer_kwargs:  # Parameters for the model itself. You could define dropout_attn: 0.1
+      node_residual: false
+      mpnn_type: 'pyg:mpnnplus'
+      mpnn_kwargs:
+        in_dim: 256
+        out_dim: 256
+        in_dim_edges: 128
+        out_dim_edges: 128
+      attn_type: "none" # "full-attention", "none"
+      # biased_attention: false
+      attn_kwargs: null
\ No newline at end of file

From e9c832c622290e0124cb31460466522dd9c96e11 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Thu, 27 Jul 2023 16:24:26 +0000
Subject: [PATCH 08/12] add gpspp pcqm4mv2 hydra configs

---
 expts/configs/config_gpspp_10M_pcqm4m.yaml    |  6 +--
 expts/hydra-configs/accelerator/ipu.yaml      |  9 ++++-
 expts/hydra-configs/dataset/pcqm4m.yaml       |  2 +-
 .../experiment/pcqm4m_gpspp.yaml              | 13 +++++++
 .../hydra-configs/experiment/pcqm4m_mpnn.yaml |  4 +-
 expts/hydra-configs/main.yaml                 |  2 +-
 expts/hydra-configs/model/gpspp.yaml          | 38 +++++++++++++++++++
 7 files changed, 66 insertions(+), 8 deletions(-)
 create mode 100644 expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
 create mode 100644 expts/hydra-configs/model/gpspp.yaml

diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml
index 3bfaf3de2..62abd3a4a 100644
--- a/expts/configs/config_gpspp_10M_pcqm4m.yaml
+++ b/expts/configs/config_gpspp_10M_pcqm4m.yaml
@@ -92,7 +92,7 @@ datamodule:
     # 'num_chiral_centers (not included yet)']
       atom_property_list_onehot: [atomic-number, group, period, total-valence]
       atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring]
-      conformer_property_list: [positions_3d] # 3D_bias
+      conformer_property_list: [positions_3d]
       # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring']
       edge_property_list: [bond-type-onehot, stereo, in-ring]
       add_self_loop: False
@@ -175,7 +175,7 @@ architecture:
         dropout: 0.1
         normalization: "layer_norm" #"batch_norm" or "layer_norm"
         first_normalization: "layer_norm" #"batch_norm" or "layer_norm"
-      gaussian_pos: # 3D_bias
+      gaussian_pos:
         encoder_type: "gaussian_kernel"
         input_keys: ["positions_3d"]
         output_keys: ["feat", "nodepair_gaussian_bias_3d"]
@@ -209,7 +209,7 @@ architecture:
         out_dim_edges: 128
       attn_type: "full-attention" # "full-attention", "none"
       precision: &precision 16-true
-      biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias
+      biased_attention_key: "nodepair_gaussian_bias_3d"
       attn_kwargs:
         num_heads: 32
       droppath_rate_attn: 0.0
diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml
index b7075af97..372297e4c 100644
--- a/expts/hydra-configs/accelerator/ipu.yaml
+++ b/expts/hydra-configs/accelerator/ipu.yaml
@@ -6,4 +6,11 @@ ipu_config:
     # - enableExecutableCaching("pop_compiler_cache")
     - TensorLocations.numIOTiles(128)
     - _Popart.set("defaultBufferingDepth", 96)
-    - Precision.enableStochasticRounding(True)
\ No newline at end of file
+    - Precision.enableStochasticRounding(True)
+
+ipu_inference_config:
+    # set device iteration and replication factor to 1 during inference
+    # gradient accumulation was set to 1 in the code
+    - deviceIterations(1)
+    - replicationFactor(1)
+    - Precision.enableStochasticRounding(False)
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml
index 46f36e00d..41194ee2f 100644
--- a/expts/hydra-configs/dataset/pcqm4m.yaml
+++ b/expts/hydra-configs/dataset/pcqm4m.yaml
@@ -13,7 +13,7 @@ datamodule:
         # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
         smiles_col: "cxsmiles"
         label_cols: ["homo_lumo_gap"]
-        # sample_size: 8000 # use sample_size for test
+        sample_size: 8000 # use sample_size for test
         splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
         split_names: ["train", "valid", "test-dev"]
         # graphium/data/PCQM4Mv2/split_dict.pt
diff --git a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
new file mode 100644
index 000000000..d0e4e3a96
--- /dev/null
+++ b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
@@ -0,0 +1,13 @@
+# @package _global_
+
+# GPS++ model with the PCQMv2 dataset.
+constants:
+  name: pcqm4mv2_gpspp_4layer
+  entity: "multitask-gnn"
+  seed: 42
+  max_epochs: 100
+  raise_train_error: true   # Whether the code should raise an error if it crashes during training
+
+trainer:
+  model_checkpoint:
+    dirpath: models_checkpoints/PCMQ4Mv2/gpspp/
\ No newline at end of file
diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
index 037f5e967..28eda3bdf 100644
--- a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
+++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
@@ -1,6 +1,6 @@
 # @package _global_
 
-# MPNN model with the PCQMv2 dataset on IPU.
+# MPNN model with the PCQMv2 dataset.
 constants:
   name: pcqm4mv2_mpnn_4layer
   entity: "multitask-gnn"
@@ -10,4 +10,4 @@ constants:
 
 trainer:
   model_checkpoint:
-    dirpath: models_checkpoints/PCMQ4Mv2/
\ No newline at end of file
+    dirpath: models_checkpoints/PCMQ4Mv2/mpnn/
\ No newline at end of file
diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml
index 903b7fe1d..e5a78fdfc 100644
--- a/expts/hydra-configs/main.yaml
+++ b/expts/hydra-configs/main.yaml
@@ -1,7 +1,7 @@
 defaults:
   - accelerator: ipu
   - dataset: pcqm4m
-  - model: mpnn
+  - model: gpspp
 
   # Specializations
   - experiment: ${dataset}_${model}
diff --git a/expts/hydra-configs/model/gpspp.yaml b/expts/hydra-configs/model/gpspp.yaml
new file mode 100644
index 000000000..0b231fcf1
--- /dev/null
+++ b/expts/hydra-configs/model/gpspp.yaml
@@ -0,0 +1,38 @@
+# @package _global_
+
+architecture:
+  pe_encoders:
+    encoders:
+      gaussian_pos:
+        encoder_type: "gaussian_kernel"
+        input_keys: ["positions_3d"]
+        output_keys: ["feat", "nodepair_gaussian_bias_3d"]
+        num_heads: 32
+        num_layers: 1 #2
+        embed_dim: 32
+        out_dim: 32 # need num of gaussian kernels 128
+        # but currently it checks pe_out_dim == pe_out_dim in encoder_manager.py, line 128
+        use_input_keys_prefix: False
+
+  gnn:
+    layer_type: 'pyg:gps'
+    layer_kwargs:  # Parameters for the model itself. You could define dropout_attn: 0.1
+      node_residual: false
+      mpnn_type: 'pyg:mpnnplus'
+      mpnn_kwargs:
+        in_dim: 256
+        out_dim: 256
+        in_dim_edges: 128
+        out_dim_edges: 128
+      attn_type: "full-attention" # "full-attention", "none"
+      precision: &precision 16-true
+      biased_attention_key: "nodepair_gaussian_bias_3d" # 3D_bias
+      attn_kwargs:
+        num_heads: 32
+      droppath_rate_attn: 0.0
+      droppath_rate_ffn: 0.0
+
+datamodule:
+  args: # Matches that in the test_multitask_datamodule.py case.
+    featurization:
+      conformer_property_list: [positions_3d]

From 1c933b5385c0fef06579764a55973126f91229e4 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Thu, 27 Jul 2023 16:46:00 +0000
Subject: [PATCH 09/12] remove original configs

---
 expts/configs/config_gpspp_10M_pcqm4m.yaml | 307 ---------------------
 expts/configs/config_mpnn_10M_pcqm4m.yaml  | 292 --------------------
 expts/hydra-configs/main.yaml              |   2 +-
 3 files changed, 1 insertion(+), 600 deletions(-)
 delete mode 100644 expts/configs/config_gpspp_10M_pcqm4m.yaml
 delete mode 100644 expts/configs/config_mpnn_10M_pcqm4m.yaml

diff --git a/expts/configs/config_gpspp_10M_pcqm4m.yaml b/expts/configs/config_gpspp_10M_pcqm4m.yaml
deleted file mode 100644
index 62abd3a4a..000000000
--- a/expts/configs/config_gpspp_10M_pcqm4m.yaml
+++ /dev/null
@@ -1,307 +0,0 @@
-# GPS++ model with the PCQMv2 dataset on IPU.
-constants:
-  name: &name pcqm4mv2_gpspp_4layer
-  seed: &seed 42
-  raise_train_error: true   # Whether the code should raise an error if it crashes during training
-  entity: multitask-gnn
-
-accelerator:
-  type: ipu  # cpu or ipu or gpu
-  config_override:
-    datamodule:
-      args:
-        ipu_dataloader_training_opts:
-          mode: async
-          max_num_nodes_per_graph: 20 # train max nodes: 20, max_edges: 54
-          max_num_edges_per_graph: 60
-        ipu_dataloader_inference_opts:
-          mode: async
-          max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
-          max_num_edges_per_graph: 120
-        # Data handling-related
-        batch_size_training: 16
-        batch_size_inference: 16
-    predictor:
-      metrics_every_n_train_steps: 1000
-      optim_kwargs:
-        loss_scaling: 1024
-    trainer:
-      trainer:
-        precision: 16-true
-        accumulate_grad_batches: 4
-
-  ipu_config:
-    - deviceIterations(20) # IPU would require large batches to be ready for the model.
-    - replicationFactor(16)
-    # - enableProfiling("graph_analyser")       # The folder where the profile will be stored
-    # - enableExecutableCaching("pop_compiler_cache")
-    - TensorLocations.numIOTiles(128)
-    - _Popart.set("defaultBufferingDepth", 128)
-    - Precision.enableStochasticRounding(True)
-
-  ipu_inference_config:
-  # set device iteration and replication factor to 1 during inference
-  # gradient accumulation was set to 1 in the code
-    - deviceIterations(1)
-    - replicationFactor(1)
-    - Precision.enableStochasticRounding(False)
-
-# accelerator:
-#   type: cpu  # cpu or ipu or gpu
-#   config_override:
-#     args:
-#       datamodule:
-#         batch_size_training: 256
-#         batch_size_inference: 64
-#     trainer:
-#       trainer:
-#         precision: 32
-#         accumulate_grad_batches: 1
-
-datamodule:
-  module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
-  args: # Matches that in the test_multitask_datamodule.py case.
-    task_specific_args:   # To be replaced by a new class "DatasetParams"
-      homolumo:
-        df: null
-        task_level: "graph"
-        df_path: graphium/data/PCQM4M/pcqm4mv2.csv #graphium/data/PCQM4Mv2/pcqm4mv2.csv
-        # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv
-        # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
-        smiles_col: "cxsmiles"
-        label_cols: ["homo_lumo_gap"]
-        # sample_size: 8000 # use sample_size for test
-        splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
-        # graphium/data/PCQM4Mv2/split_dict.pt
-        # graphium/data/PCQM4Mv2/pcqm4m_split.csv
-        split_names: ["train", "valid", "test-dev"]
-        seed: *seed
-        label_normalization:
-          method: "normal"
-
-    # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
-    processed_graph_data_path: "../datacache/PCQM4Mv2/"
-    featurization:
-    # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
-    # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
-    # 'num_chiral_centers (not included yet)']
-      atom_property_list_onehot: [atomic-number, group, period, total-valence]
-      atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring]
-      conformer_property_list: [positions_3d]
-      # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring']
-      edge_property_list: [bond-type-onehot, stereo, in-ring]
-      add_self_loop: False
-      explicit_H: False # if H is included
-      use_bonds_weights: False
-      pos_encoding_as_features: # encoder dropout 0.18
-        pos_types:
-          lap_eigvec:
-            pos_level: node
-            pos_type: laplacian_eigvec
-            num_pos: 8
-            normalization: "none" # nomrlization already applied on the eigen vectors
-            disconnected_comp: True # if eigen values/vector for disconnected graph are included
-          lap_eigval:
-            pos_level: node
-            pos_type: laplacian_eigval
-            num_pos: 8
-            normalization: "none" # nomrlization already applied on the eigen vectors
-            disconnected_comp: True # if eigen values/vector for disconnected graph are included
-          rw_pos: # use same name as pe_encoder
-            pos_level: node
-            pos_type: rw_return_probs
-            ksteps: 16
-
-
-    # cache_data_path: .
-    num_workers: 30 # -1 to use all
-    persistent_workers: False # if use persistent worker at the start of each epoch.
-    # Using persistent_workers false might make the start of each epoch very long.
-
-
-architecture:
-  model_type: FullGraphMultiTaskNetwork
-  mup_base_path: null
-  pre_nn:   # Set as null to avoid a pre-nn network
-    out_dim: 256
-    hidden_dims: 1024
-    depth: 2
-    activation: relu
-    last_activation: none
-    dropout: &dropout 0.18
-    normalization: &normalization layer_norm
-    last_normalization: *normalization
-    residual_type: none
-
-  pre_nn_edges:   # Set as null to avoid a pre-nn network
-    out_dim: 128
-    hidden_dims: 512
-    depth: 2
-    activation: relu
-    last_activation: none
-    dropout: *dropout
-    normalization: *normalization
-    last_normalization: *normalization
-    residual_type: none
-
-  pe_encoders:
-    out_dim: 32
-    pool: "sum" #"mean" "max"
-    last_norm: None #"batch_norm", "layer_norm"
-    encoders: #la_pos |  rw_pos
-      la_pos:  # Set as null to avoid a pre-nn network
-        encoder_type: "laplacian_pe"
-        input_keys: ["laplacian_eigvec", "laplacian_eigval"]
-        output_keys: ["feat"]
-        hidden_dim: 64
-        out_dim: 32
-        model_type: 'DeepSet' #'Transformer' or 'DeepSet'
-        num_layers: 2
-        num_layers_post: 1 # Num. layers to apply after pooling
-        dropout: 0.1
-        first_normalization: "none" #"batch_norm" or "layer_norm"
-      rw_pos:
-        encoder_type: "mlp"
-        input_keys: ["rw_return_probs"]
-        output_keys: ["feat"]
-        hidden_dim: 64
-        out_dim: 32
-        num_layers: 2
-        dropout: 0.1
-        normalization: "layer_norm" #"batch_norm" or "layer_norm"
-        first_normalization: "layer_norm" #"batch_norm" or "layer_norm"
-      gaussian_pos:
-        encoder_type: "gaussian_kernel"
-        input_keys: ["positions_3d"]
-        output_keys: ["feat", "nodepair_gaussian_bias_3d"]
-        num_heads: 32
-        num_layers: 1 #2
-        embed_dim: 32
-        out_dim: 32 # need num of gaussian kernels 128
-        # but currently it checks pe_out_dim == pe_out_dim in encoder_manager.py, line 128
-        use_input_keys_prefix: False
-
-
-  gnn:  # Set as null to avoid a post-nn network
-    out_dim: 256
-    hidden_dims: 256
-    depth: 4
-    activation: gelu
-    last_activation: none
-    dropout: 0.1
-    normalization: "layer_norm"
-    last_normalization: *normalization
-    residual_type: simple
-    virtual_node: 'none'
-    layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps
-    layer_kwargs:  # Parameters for the model itself. You could define dropout_attn: 0.1
-      node_residual: false
-      mpnn_type: 'pyg:mpnnplus'
-      mpnn_kwargs:
-        in_dim: 256
-        out_dim: 256
-        in_dim_edges: 128
-        out_dim_edges: 128
-      attn_type: "full-attention" # "full-attention", "none"
-      precision: &precision 16-true
-      biased_attention_key: "nodepair_gaussian_bias_3d"
-      attn_kwargs:
-        num_heads: 32
-      droppath_rate_attn: 0.0
-      droppath_rate_ffn: 0.0
-
-
-  graph_output_nn:
-    graph:
-      pooling: [sum]
-      out_dim: 256
-      hidden_dims: 256
-      depth: 1
-      activation: relu
-      last_activation: none
-      dropout: *dropout
-      normalization: *normalization
-      last_normalization: "none"
-      residual_type: none
-
-  task_heads:
-    homolumo:
-      task_level: graph
-      out_dim: 1
-      hidden_dims: 256
-      depth: 2                          # Not needed if we have hidden_dims
-      activation: relu
-      last_activation: none
-      dropout: *dropout
-      normalization: *normalization
-      last_normalization: "none"
-      residual_type: none
-
-#Task-specific
-predictor:
-  metrics_on_progress_bar:
-    homolumo: []
-  metrics_on_training_set:
-    homolumo: ["pearsonr"]  
-  loss_fun:
-    homolumo: mae_ipu
-  random_seed: *seed
-  optim_kwargs:
-    lr: 4.e-4 # warmup can be scheduled using torch_scheduler_kwargs
-    # weight_decay: 1.e-7
-    # loss_scaling: 1024
-  torch_scheduler_kwargs:
-    module_type: WarmUpLinearLR
-    max_num_epochs: &max_epochs 100
-    warmup_epochs: 10
-    verbose: False
-  scheduler_kwargs:
-  #  monitor: &monitor homolumo/mae/train
-  #  mode: min
-  #  frequency: 1
-  target_nan_mask: null # null: no mask, 0: 0 mask, ignore: ignore nan values from loss
-  flag_kwargs:
-    n_steps: 0 # 1
-    alpha: 0.0 # 0.01
-
-# Task-specific
-metrics:
-  homolumo:
-    - name: mae
-      metric: mae_ipu
-      target_nan_mask: null
-      multitask_handling: mean-per-label
-      threshold_kwargs: null
-    - name: pearsonr
-      metric: pearsonr_ipu
-      threshold_kwargs: null
-      target_nan_mask: null
-      multitask_handling: mean-per-label
-
-trainer:
-  seed: *seed
-  logger:
-    save_dir: logs/PCQMv2
-    name: *name
-    project: PCQMv2_gpspp
-  #early_stopping:
-  #  monitor: *monitor
-  #  min_delta: 0
-  #  patience: 10
-  #  mode: &mode min
-  model_checkpoint:
-    dirpath: models_checkpoints/PCMQ4Mv2/
-    filename: *name
-    #monitor: *monitor
-    #mode: *mode
-    save_top_k: 1
-    every_n_epochs: 100
-  trainer:
-    max_epochs: *max_epochs
-    min_epochs: 1
-    check_val_every_n_epoch: 20
\ No newline at end of file
diff --git a/expts/configs/config_mpnn_10M_pcqm4m.yaml b/expts/configs/config_mpnn_10M_pcqm4m.yaml
deleted file mode 100644
index 2a4c18b5b..000000000
--- a/expts/configs/config_mpnn_10M_pcqm4m.yaml
+++ /dev/null
@@ -1,292 +0,0 @@
-# MPNN model with the PCQMv2 dataset on IPU.
-constants:
-  name: &name pcqm4mv2_mpnn_4layer
-  seed: &seed 42
-  raise_train_error: true   # Whether the code should raise an error if it crashes during training
-  entity: multitask-gnn
-
-accelerator:
-  type: ipu  # cpu or ipu or gpu
-  config_override:
-    datamodule:
-      args:
-        ipu_dataloader_training_opts:
-          mode: async
-          max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54
-          max_num_edges_per_graph: 60
-        ipu_dataloader_inference_opts:
-          mode: async
-          max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
-          max_num_edges_per_graph: 120
-        # Data handling-related
-        batch_size_training: 64
-        batch_size_inference: 16
-    predictor:
-      metrics_every_n_train_steps: 1000
-      optim_kwargs:
-        loss_scaling: 1024
-    trainer:
-      trainer:
-        precision: 16-true
-        accumulate_grad_batches: 4
-
-  ipu_config:
-    - deviceIterations(20) # IPU would require large batches to be ready for the model.
-    - replicationFactor(16)
-    # - enableProfiling("graph_analyser")       # The folder where the profile will be stored
-    # - enableExecutableCaching("pop_compiler_cache")
-    - TensorLocations.numIOTiles(128)
-    - _Popart.set("defaultBufferingDepth", 128)
-    - Precision.enableStochasticRounding(True)
-
-  ipu_inference_config:
-  # set device iteration and replication factor to 1 during inference
-  # gradient accumulation was set to 1 in the code
-    - deviceIterations(1)
-    - replicationFactor(1)
-    - Precision.enableStochasticRounding(False)
-
-# accelerator:
-#   type: cpu  # cpu or ipu or gpu
-#   config_override:
-#     args:
-#       datamodule:
-#         batch_size_training: 256
-#         batch_size_inference: 64
-#     trainer:
-#       trainer:
-#         precision: 32
-#         accumulate_grad_batches: 1
-
-datamodule:
-  module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
-  args: # Matches that in the test_multitask_datamodule.py case.
-    task_specific_args:   # To be replaced by a new class "DatasetParams"
-      homolumo:
-        df: null
-        task_level: "graph"
-        df_path: graphium/data/PCQM4M/pcqm4mv2.csv
-        # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv
-        # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
-        smiles_col: "cxsmiles"
-        label_cols: ["homo_lumo_gap"]
-        # sample_size: 8000 # use sample_size for test
-        splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
-        split_names: ["train", "valid", "test-dev"]
-        # graphium/data/PCQM4Mv2/split_dict.pt
-        # graphium/data/PCQM4Mv2/pcqm4m_split.csv
-        # split_val: 0.1
-        # split_test: 0.1
-        seed: *seed
-        label_normalization:
-          method: "normal"
-
-    # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
-    processed_graph_data_path: "../datacache/PCQM4Mv2/"
-    featurization:
-    # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
-    # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
-    # 'num_chiral_centers (not included yet)']
-      atom_property_list_onehot: [atomic-number, group, period, total-valence]
-      atom_property_list_float: [degree, formal-charge, radical-electron, aromatic, in-ring]
-      # OGB: ['possible_bond_type', 'possible_bond_stereo', 'possible_is_in_ring']
-      edge_property_list: [bond-type-onehot, stereo, in-ring]
-      add_self_loop: False
-      explicit_H: False # if H is included
-      use_bonds_weights: False
-      pos_encoding_as_features: # encoder dropout 0.18
-        pos_types:
-          lap_eigvec:
-            pos_level: node
-            pos_type: laplacian_eigvec
-            num_pos: 8
-            normalization: "none" # nomrlization already applied on the eigen vectors
-            disconnected_comp: True # if eigen values/vector for disconnected graph are included
-          lap_eigval:
-            pos_level: node
-            pos_type: laplacian_eigval
-            num_pos: 8
-            normalization: "none" # nomrlization already applied on the eigen vectors
-            disconnected_comp: True # if eigen values/vector for disconnected graph are included
-          rw_pos: # use same name as pe_encoder
-            pos_level: node
-            pos_type: rw_return_probs
-            ksteps: 16
-
-    # cache_data_path: .
-    num_workers: 30 # -1 to use all
-    persistent_workers: False # if use persistent worker at the start of each epoch.
-    # Using persistent_workers false might make the start of each epoch very long.
-
-architecture:
-  model_type: FullGraphMultiTaskNetwork
-  mup_base_path: null
-  pre_nn:   # Set as null to avoid a pre-nn network
-    out_dim: 256
-    hidden_dims: 1024
-    depth: 2
-    activation: relu
-    last_activation: none
-    dropout: &dropout 0.18
-    normalization: &normalization layer_norm
-    last_normalization: *normalization
-    residual_type: none
-
-  pre_nn_edges:   # Set as null to avoid a pre-nn network
-    out_dim: 128
-    hidden_dims: 512
-    depth: 2
-    activation: relu
-    last_activation: none
-    dropout: *dropout
-    normalization: *normalization
-    last_normalization: *normalization
-    residual_type: none
-
-  pe_encoders:
-    out_dim: 32
-    pool: "sum" #"mean" "max"
-    last_norm: None #"batch_norm", "layer_norm"
-    encoders: #la_pos |  rw_pos
-      la_pos:  # Set as null to avoid a pre-nn network
-        encoder_type: "laplacian_pe"
-        input_keys: ["laplacian_eigvec", "laplacian_eigval"]
-        output_keys: ["feat"]
-        hidden_dim: 64
-        out_dim: 32
-        model_type: 'DeepSet' #'Transformer' or 'DeepSet'
-        num_layers: 2
-        num_layers_post: 1 # Num. layers to apply after pooling
-        dropout: 0.1
-        first_normalization: "none" #"batch_norm" or "layer_norm"
-      rw_pos:
-        encoder_type: "mlp"
-        input_keys: ["rw_return_probs"]
-        output_keys: ["feat"]
-        hidden_dim: 64
-        out_dim: 32
-        num_layers: 2
-        dropout: 0.1
-        normalization: "layer_norm" #"batch_norm" or "layer_norm"
-        first_normalization: "layer_norm" #"batch_norm" or "layer_norm"
-
-
-
-  gnn:  # Set as null to avoid a post-nn network
-    out_dim: 256
-    hidden_dims: 256
-    depth: 4
-    activation: gelu
-    last_activation: none
-    dropout: 0.1
-    normalization: "layer_norm"
-    last_normalization: *normalization
-    residual_type: simple
-    virtual_node: 'none'
-    layer_type: 'pyg:gps' #pyg:gine #'pyg:gps' # pyg:gated-gcn, pyg:gine,pyg:gps
-    layer_kwargs:  # Parameters for the model itself. You could define dropout_attn: 0.1
-      node_residual: false
-      mpnn_type: 'pyg:mpnnplus'
-      mpnn_kwargs:
-        in_dim: 256
-        out_dim: 256
-        in_dim_edges: 128
-        out_dim_edges: 128
-      attn_type: "none" # "full-attention", "none"
-      # biased_attention: false
-      attn_kwargs: null
-
-
-  graph_output_nn:
-    graph:
-      pooling: [sum]
-      out_dim: 256
-      hidden_dims: 256
-      depth: 1
-      activation: relu
-      last_activation: none
-      dropout: *dropout
-      normalization: *normalization
-      last_normalization: "none"
-      residual_type: none
-
-  task_heads:
-    homolumo:
-      task_level: graph
-      out_dim: 1
-      hidden_dims: 256
-      depth: 2                          # Not needed if we have hidden_dims
-      activation: relu
-      last_activation: none
-      dropout: *dropout
-      normalization: *normalization
-      last_normalization: "none"
-      residual_type: none
-
-#Task-specific
-predictor:
-  metrics_on_progress_bar:
-    homolumo: []
-  metrics_on_training_set:
-    homolumo: ["pearsonr"]  
-  loss_fun:
-    homolumo: mae_ipu
-  random_seed: *seed
-  optim_kwargs:
-    lr: 4.e-4 # warmup can be scheduled using torch_scheduler_kwargs
-    # weight_decay: 1.e-7
-  torch_scheduler_kwargs:
-    module_type: WarmUpLinearLR
-    max_num_epochs: &max_epochs 100
-    warmup_epochs: 10
-    verbose: False
-  scheduler_kwargs:
-  #  monitor: &monitor homolumo/mae/train
-  #  mode: min
-  #  frequency: 1
-  target_nan_mask: null # null: no mask, 0: 0 mask, ignore: ignore nan values from loss
-  flag_kwargs:
-    n_steps: 0 # 1
-    alpha: 0.0 # 0.01
-
-# Task-specific
-metrics:
-  homolumo:
-    - name: mae
-      metric: mae_ipu
-      target_nan_mask: null
-      multitask_handling: mean-per-label
-      threshold_kwargs: null
-    - name: pearsonr
-      metric: pearsonr_ipu
-      threshold_kwargs: null
-      target_nan_mask: null
-      multitask_handling: mean-per-label
-
-trainer:
-  seed: *seed
-  logger:
-    save_dir: logs/PCQMv2
-    name: *name
-    project: PCQMv2_mpnn
-  #early_stopping:
-  #  monitor: *monitor
-  #  min_delta: 0
-  #  patience: 10
-  #  mode: &mode min
-  model_checkpoint:
-    dirpath: models_checkpoints/PCMQ4Mv2/
-    filename: *name
-    #monitor: *monitor
-    #mode: *mode
-    save_top_k: 1
-    every_n_epochs: 100
-  trainer:
-    max_epochs: *max_epochs
-    min_epochs: 1
-    check_val_every_n_epoch: 20
diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml
index e5a78fdfc..903b7fe1d 100644
--- a/expts/hydra-configs/main.yaml
+++ b/expts/hydra-configs/main.yaml
@@ -1,7 +1,7 @@
 defaults:
   - accelerator: ipu
   - dataset: pcqm4m
-  - model: gpspp
+  - model: mpnn
 
   # Specializations
   - experiment: ${dataset}_${model}

From 96a4d913187622df25ee0dae5d0eec07779c4c33 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Fri, 28 Jul 2023 08:54:56 +0000
Subject: [PATCH 10/12] minor change

---
 expts/hydra-configs/accelerator/ipu.yaml                | 3 ++-
 expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml | 3 ++-
 expts/hydra-configs/dataset/pcqm4m.yaml                 | 5 +++--
 expts/hydra-configs/experiment/pcqm4m_gpspp.yaml        | 3 ++-
 expts/hydra-configs/experiment/pcqm4m_mpnn.yaml         | 3 ++-
 expts/hydra-configs/main.yaml                           | 4 ++--
 expts/hydra-configs/model/mpnn.yaml                     | 3 ++-
 7 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml
index 372297e4c..8fb9b07f6 100644
--- a/expts/hydra-configs/accelerator/ipu.yaml
+++ b/expts/hydra-configs/accelerator/ipu.yaml
@@ -13,4 +13,5 @@ ipu_inference_config:
     # gradient accumulation was set to 1 in the code
     - deviceIterations(1)
     - replicationFactor(1)
-    - Precision.enableStochasticRounding(False)
\ No newline at end of file
+    - Precision.enableStochasticRounding(False)
+    
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
index ebe372605..010327262 100644
--- a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
+++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
@@ -22,4 +22,5 @@ predictor:
 trainer:
   trainer:
     precision: 16-true
-    accumulate_grad_batches: 4
\ No newline at end of file
+    accumulate_grad_batches: 4
+    
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml
index 41194ee2f..411287929 100644
--- a/expts/hydra-configs/dataset/pcqm4m.yaml
+++ b/expts/hydra-configs/dataset/pcqm4m.yaml
@@ -13,7 +13,7 @@ datamodule:
         # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly
         smiles_col: "cxsmiles"
         label_cols: ["homo_lumo_gap"]
-        sample_size: 8000 # use sample_size for test
+        # sample_size: 8000 # use sample_size for test
         splits_path: graphium/data/PCQM4M/split_dict_v2.pt  # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt`
         split_names: ["train", "valid", "test-dev"]
         # graphium/data/PCQM4Mv2/split_dict.pt
@@ -218,4 +218,5 @@ trainer:
   trainer:
     max_epochs: *max_epochs
     min_epochs: 1
-    check_val_every_n_epoch: 20
\ No newline at end of file
+    check_val_every_n_epoch: 20
+    
\ No newline at end of file
diff --git a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
index d0e4e3a96..d8fceb8fc 100644
--- a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
+++ b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
@@ -10,4 +10,5 @@ constants:
 
 trainer:
   model_checkpoint:
-    dirpath: models_checkpoints/PCMQ4Mv2/gpspp/
\ No newline at end of file
+    dirpath: models_checkpoints/PCMQ4Mv2/gpspp/
+    
\ No newline at end of file
diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
index 28eda3bdf..6621137f8 100644
--- a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
+++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
@@ -10,4 +10,5 @@ constants:
 
 trainer:
   model_checkpoint:
-    dirpath: models_checkpoints/PCMQ4Mv2/mpnn/
\ No newline at end of file
+    dirpath: models_checkpoints/PCMQ4Mv2/mpnn/
+    
\ No newline at end of file
diff --git a/expts/hydra-configs/main.yaml b/expts/hydra-configs/main.yaml
index 903b7fe1d..198bccb0c 100644
--- a/expts/hydra-configs/main.yaml
+++ b/expts/hydra-configs/main.yaml
@@ -1,7 +1,7 @@
 defaults:
   - accelerator: ipu
-  - dataset: pcqm4m
-  - model: mpnn
+  - dataset: toymix
+  - model: gcn
 
   # Specializations
   - experiment: ${dataset}_${model}
diff --git a/expts/hydra-configs/model/mpnn.yaml b/expts/hydra-configs/model/mpnn.yaml
index 0d322ade3..0ac1432ed 100644
--- a/expts/hydra-configs/model/mpnn.yaml
+++ b/expts/hydra-configs/model/mpnn.yaml
@@ -13,4 +13,5 @@ architecture:
         out_dim_edges: 128
       attn_type: "none" # "full-attention", "none"
       # biased_attention: false
-      attn_kwargs: null
\ No newline at end of file
+      attn_kwargs: null
+      
\ No newline at end of file

From 83aa388d2647629fe2ddecff018b62d55bf94ca9 Mon Sep 17 00:00:00 2001
From: zhiyil-graphcore <zhiyil@graphcore.ai>
Date: Fri, 28 Jul 2023 08:58:41 +0000
Subject: [PATCH 11/12] remove space for new line

---
 expts/hydra-configs/accelerator/ipu.yaml                | 1 -
 expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml | 1 -
 expts/hydra-configs/dataset/pcqm4m.yaml                 | 1 -
 expts/hydra-configs/experiment/pcqm4m_gpspp.yaml        | 1 -
 expts/hydra-configs/experiment/pcqm4m_mpnn.yaml         | 1 -
 expts/hydra-configs/model/mpnn.yaml                     | 1 -
 6 files changed, 6 deletions(-)

diff --git a/expts/hydra-configs/accelerator/ipu.yaml b/expts/hydra-configs/accelerator/ipu.yaml
index 8fb9b07f6..43e4455ef 100644
--- a/expts/hydra-configs/accelerator/ipu.yaml
+++ b/expts/hydra-configs/accelerator/ipu.yaml
@@ -14,4 +14,3 @@ ipu_inference_config:
     - deviceIterations(1)
     - replicationFactor(1)
     - Precision.enableStochasticRounding(False)
-    
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
index 010327262..31cba7fc3 100644
--- a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
+++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
@@ -23,4 +23,3 @@ trainer:
   trainer:
     precision: 16-true
     accumulate_grad_batches: 4
-    
\ No newline at end of file
diff --git a/expts/hydra-configs/dataset/pcqm4m.yaml b/expts/hydra-configs/dataset/pcqm4m.yaml
index 411287929..391bb21de 100644
--- a/expts/hydra-configs/dataset/pcqm4m.yaml
+++ b/expts/hydra-configs/dataset/pcqm4m.yaml
@@ -219,4 +219,3 @@ trainer:
     max_epochs: *max_epochs
     min_epochs: 1
     check_val_every_n_epoch: 20
-    
\ No newline at end of file
diff --git a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
index d8fceb8fc..a321e835d 100644
--- a/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
+++ b/expts/hydra-configs/experiment/pcqm4m_gpspp.yaml
@@ -11,4 +11,3 @@ constants:
 trainer:
   model_checkpoint:
     dirpath: models_checkpoints/PCMQ4Mv2/gpspp/
-    
\ No newline at end of file
diff --git a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
index 6621137f8..08b1b1f3c 100644
--- a/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
+++ b/expts/hydra-configs/experiment/pcqm4m_mpnn.yaml
@@ -11,4 +11,3 @@ constants:
 trainer:
   model_checkpoint:
     dirpath: models_checkpoints/PCMQ4Mv2/mpnn/
-    
\ No newline at end of file
diff --git a/expts/hydra-configs/model/mpnn.yaml b/expts/hydra-configs/model/mpnn.yaml
index 0ac1432ed..4a8a428e8 100644
--- a/expts/hydra-configs/model/mpnn.yaml
+++ b/expts/hydra-configs/model/mpnn.yaml
@@ -14,4 +14,3 @@ architecture:
       attn_type: "none" # "full-attention", "none"
       # biased_attention: false
       attn_kwargs: null
-      
\ No newline at end of file

From f60b9688cf60a5c9674de9eff195a69aec988ad4 Mon Sep 17 00:00:00 2001
From: Zhiyi Li <86362692+zhiyil-graphcore@users.noreply.github.com>
Date: Fri, 28 Jul 2023 14:55:04 +0100
Subject: [PATCH 12/12] Update pcqm4m_ipu.yaml

---
 expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
index 31cba7fc3..6502f9414 100644
--- a/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
+++ b/expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
@@ -11,7 +11,7 @@ datamodule:
       max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
       max_num_edges_per_graph: 120
     # Data handling-related
-    batch_size_training: 64
+    batch_size_training: 32
     batch_size_inference: 16
 
 predictor:
@@ -22,4 +22,4 @@ predictor:
 trainer:
   trainer:
     precision: 16-true
-    accumulate_grad_batches: 4
+    accumulate_grad_batches: 2