Skip to content

Commit

Permalink
Merge pull request #419 from datamol-io/pcqm4m_changes
Browse files Browse the repository at this point in the history
pcqm4m changes with mpnn and gps++ configs, fix run_validation_test.py
  • Loading branch information
DomInvivo authored Aug 1, 2023
2 parents b0d4fd5 + f60b968 commit 66c7adc
Show file tree
Hide file tree
Showing 12 changed files with 145 additions and 339 deletions.
227 changes: 0 additions & 227 deletions expts/configs/config_gpspp_10M_pcqm4m.yaml

This file was deleted.

13 changes: 10 additions & 3 deletions expts/hydra-configs/accelerator/ipu.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
type: ipu
ipu_config:
- deviceIterations(5) # IPU would require large batches to be ready for the model.
- deviceIterations(30) # IPU would require large batches to be ready for the model.
- replicationFactor(16)
# - enableProfiling("graph_analyser") # The folder where the profile will be stored
# - enableExecutableCaching("pop_compiler_cache")
- TensorLocations.numIOTiles(128)
- _Popart.set("defaultBufferingDepth", 128)
- Precision.enableStochasticRounding(True)
- _Popart.set("defaultBufferingDepth", 96)
- Precision.enableStochasticRounding(True)

ipu_inference_config:
# set device iteration and replication factor to 1 during inference
# gradient accumulation was set to 1 in the code
- deviceIterations(1)
- replicationFactor(1)
- Precision.enableStochasticRounding(False)
25 changes: 25 additions & 0 deletions expts/hydra-configs/dataset/accelerator/pcqm4m_ipu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# @package _global_

datamodule:
args:
ipu_dataloader_training_opts:
mode: async
max_num_nodes_per_graph: 16 # train max nodes: 20, max_edges: 54
max_num_edges_per_graph: 60
ipu_dataloader_inference_opts:
mode: async
max_num_nodes_per_graph: 30 # valid max nodes: 51, max_edges: 118
max_num_edges_per_graph: 120
# Data handling-related
batch_size_training: 32
batch_size_inference: 16

predictor:
metrics_every_n_train_steps: 1000
optim_kwargs:
loss_scaling: 1024

trainer:
trainer:
precision: 16-true
accumulate_grad_batches: 2
Loading

0 comments on commit 66c7adc

Please sign in to comment.