Skip to content

Commit

Permalink
Merge pull request #410 from datamol-io/ipu_dataloader_test
Browse files Browse the repository at this point in the history
Ipu dataloader test
  • Loading branch information
DomInvivo authored Aug 1, 2023
2 parents 66c7adc + f6f1092 commit ddec593
Show file tree
Hide file tree
Showing 14 changed files with 172 additions and 131 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
- "*"
- "!gh-pages"
schedule:
- cron: "0 4 * * MON"
- cron: "0 4 * * *"

jobs:
test:
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/test_ipu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
- "*"
- "!gh-pages"
schedule:
- cron: "0 4 * * MON"
- cron: "0 4 * * *"

jobs:
test-ipu:
Expand Down Expand Up @@ -46,11 +46,16 @@ jobs:
python -c "import poptorch"
# Download the datafiles (Total ~ 10Mb - nothing compared to the libraries)
wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz
wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz
wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz
# Install the IPU specific and graphium requirements
pip install -r requirements_ipu.txt
# Install Graphium in dev mode
python -m pip install --no-deps -e .
python3 -m pytest -m 'not skip_ipu'
- name: Test CLI
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ mkdir ~/.venv # Create the folder for the environm
python3 -m venv ~/.venv/graphium_ipu # Create the environment
source ~/.venv/graphium_ipu/bin/activate # Activate the environment

python3 -m pip install --upgrade pip
# We can download the Poplar SDK directly using `wget` - more details on the various Graphcore downloads can be found here `https://www.graphcore.ai/downloads`

# NOTE: For simplicity this will download the SDK directly where you run this command, we recommend doing this outside the Graphium directory.
Expand Down
1 change: 1 addition & 0 deletions env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ dependencies:
- pytest >=6.0
- pytest-xdist
- pytest-cov
- pytest-forked
- nbconvert
- black >=23
- jupyterlab
Expand Down
3 changes: 2 additions & 1 deletion expts/neurips2023_configs/config_small_gcn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ constants:
name: &name neurips2023_small_data_gcn
seed: &seed 42
raise_train_error: true # Whether the code should raise an error if it crashes during training
entity: multitask-gnn

accelerator:
type: ipu # cpu or ipu or gpu
Expand Down Expand Up @@ -30,7 +31,7 @@ accelerator:

ipu_config:
- deviceIterations(5) # IPU would require large batches to be ready for the model.
- replicationFactor(16)
- replicationFactor(1)
# - enableProfiling("graph_analyser") # The folder where the profile will be stored
# - enableExecutableCaching("pop_compiler_cache")
- TensorLocations.numIOTiles(128)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ include = '\.pyi?$'

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "--verbose --durations=10 -n auto --cov=graphium --cov-fail-under=60 --cov-report xml --cov-report term"
addopts = "--verbose --durations=10 -n 1 --cov=graphium --cov-fail-under=60 --cov-report xml --cov-report term"
testpaths = ["tests"]
filterwarnings = [
"ignore::DeprecationWarning:ray.*:",
Expand Down
3 changes: 2 additions & 1 deletion requirements_ipu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ umap-learn
pytest >==6.0
pytest-cov
pytest-xdist
pytest-forked
black >=23
jupyterlab
ipywidgets
Expand Down Expand Up @@ -53,4 +54,4 @@ fastparquet
torch-scatter==2.1.1
torch-sparse==0.6.17
lightning @ git+https://github.com/Lightning-AI/lightning@ca30fd7752582201a3966806c92e3acbbaf2a045
lightning-graphcore @ git+https://github.com/Lightning-AI/lightning-Graphcore
lightning-graphcore @ git+https://github.com/s-maddrellmander/lightning-Graphcore.git
Empty file added tests/__init__.py
Empty file.
1 change: 1 addition & 0 deletions tests/config_test_ipu_dataloader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ metrics:
threshold_kwargs: null

trainer:
seed: *seed
logger:
save_dir: logs/QM9
name: *name
Expand Down
39 changes: 18 additions & 21 deletions tests/config_test_ipu_dataloader_multitask.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ accelerator:

ipu_config:
- deviceIterations(5) # IPU would require large batches to be ready for the model.
- replicationFactor(4)
- replicationFactor(1)
# - enableProfiling("graph_analyser") # The folder where the profile will be stored
# - enableExecutableCaching("pop_compiler_cache")
- TensorLocations.numIOTiles(128)
Expand All @@ -56,13 +56,12 @@ datamodule:
task_specific_args: # To be replaced by a new class "DatasetParams"
qm9:
df: null
df_path: https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz
df_path: qm9.csv.gz
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz
# or set path as the URL directly
smiles_col: "smiles"
label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"]
sample_size: 2000 # use sample_size for test
splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt`
seed: *seed
task_level: graph
label_normalization:
Expand All @@ -71,26 +70,24 @@ datamodule:

tox21:
df: null
df_path: https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz
df_path: Tox21-7k-12-labels.csv.gz
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz
# or set path as the URL directly
smiles_col: "smiles"
label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"]
sample_size: 2000 # use sample_size for test
splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt`
seed: *seed
task_level: graph

zinc:
df: null
df_path: https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz
df_path: ZINC12k.csv.gz
# df_path: data/neurips2023/small-dataset/ZINC12k.csv.gz
# wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz
# or set path as the URL directly
smiles_col: "smiles"
label_cols: ["SA", "logp", "score"]
sample_size: 2000 # use sample_size for test
splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt`
seed: *seed
task_level: graph
label_normalization:
Expand Down Expand Up @@ -144,17 +141,17 @@ architecture:
model_type: FullGraphMultiTaskNetwork
mup_base_path: null
pre_nn: # Set as null to avoid a pre-nn network
out_dim: 64
hidden_dims: 256
depth: 2
out_dim: 16
hidden_dims: 16
depth: 1
activation: relu
last_activation: none
dropout: &dropout 0.18
dropout: &dropout 0.1
normalization: &normalization layer_norm
last_normalization: *normalization
residual_type: none

pre_nn_edges: null # Set as null to avoid a pre-nn network
pre_nn_edges: null # Set as null to avoid a pre-nn network

pe_encoders:
out_dim: 32
Expand Down Expand Up @@ -186,10 +183,10 @@ architecture:


gnn: # Set as null to avoid a post-nn network
in_dim: 64 # or otherwise the correct value
out_dim: &gnn_dim 96
in_dim: 16 # or otherwise the correct value
out_dim: &gnn_dim 16
hidden_dims: *gnn_dim
depth: 4
depth: 1
activation: gelu
last_activation: none
dropout: 0.1
Expand Down Expand Up @@ -218,8 +215,8 @@ architecture:
qm9:
task_level: graph
out_dim: 19
hidden_dims: 128
depth: 2
hidden_dims: 16
depth: 1
activation: relu
last_activation: none
dropout: *dropout
Expand All @@ -229,8 +226,8 @@ architecture:
tox21:
task_level: graph
out_dim: 12
hidden_dims: 64
depth: 2
hidden_dims: 16
depth: 1
activation: relu
last_activation: sigmoid
dropout: *dropout
Expand All @@ -240,7 +237,7 @@ architecture:
zinc:
task_level: graph
out_dim: 3
hidden_dims: 32
hidden_dims: 16
depth: 2
activation: relu
last_activation: none
Expand All @@ -265,7 +262,7 @@ predictor:
# weight_decay: 1.e-7
torch_scheduler_kwargs:
module_type: WarmUpLinearLR
max_num_epochs: &max_epochs 3
max_num_epochs: &max_epochs 1
warmup_epochs: 1
verbose: False
scheduler_kwargs:
Expand Down
Loading

0 comments on commit ddec593

Please sign in to comment.