datamol-io · DomInvivo · Jul 9, 2024 · Apr 13, 2024 · Apr 17, 2024 · Apr 17, 2024
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.09", "3.10", "3.11"]
         pytorch-version: ["2.0"]
 
     runs-on: "ubuntu-latest"
@@ -49,6 +49,9 @@ jobs:
       - name: Install library
         run: python -m pip install --no-deps -e . # `-e` required for correct `coverage` run.
 
+      - name: Install C++ library
+        run: cd graphium/graphium_cpp && git clone https://github.com/pybind/pybind11.git && export PYTHONPATH=$PYTHONPATH:./pybind11 && python -m pip install . && cd ../..
+
       - name: Run tests
         run: pytest -m 'not ipu'
 

@@ -189,6 +189,7 @@
    Copyright 2023 Valence Labs
    Copyright 2023 Recursion Pharmaceuticals
    Copyright 2023 Graphcore Limited
+   Copyright 2024 NVIDIA CORPORATION & AFFILIATES
 
    Various Academic groups have also contributed to this software under
    the given license. These include, but are not limited, to the following

@@ -5,37 +5,8 @@ Feature extraction and manipulation
 === "Contents"
 
     * [Featurizer](#featurizer)
-    * [Positional Encoding](#positional-encoding)
-    * [Properties](#properties)
-    * [Spectral PE](#spectral-pe)
-    * [Random Walk PE](#random-walk-pe)
-    * [NMP](#nmp)
 
 ## Featurizer
 ------------
 ::: graphium.features.featurizer
 
-
-## Positional Encoding
-------------
-::: graphium.features.positional_encoding
-
-
-## Properties
-------------
-::: graphium.features.properties
-
-
-## Spectral PE
-------------
-::: graphium.features.spectral
-
-
-## Random Walk PE
-------------
-::: graphium.features.rw
-
-
-## NMP
-------------
-::: graphium.features.nmp
@@ -46,10 +46,6 @@ module for utility functions
 ::: graphium.utils.mup
 
 
-## Read File
-----------------
-::: graphium.utils.read_file
-
 ## Safe Run
 ----------------
 ::: graphium.utils.safe_run

@@ -28,7 +28,7 @@ dependencies:
   - gcsfs >=2021.6
 
   # ML packages
-  - cuda-version # works also with CPU-only system.
+  - cuda-version == 11.2 # works also with CPU-only system.
   - pytorch >=1.12
   - lightning >=2.0
   - torchmetrics >=0.7.0,<0.11
@@ -43,6 +43,7 @@ dependencies:
   # chemistry
   - rdkit
   - datamol >=0.10
+  - boost # needed by rdkit
 
   # Optional deps
   - sympy

@@ -59,7 +59,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       homolumo:
@@ -76,10 +75,6 @@ datamodule:
         split_test: 0.1
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -115,7 +110,6 @@ datamodule:
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
 
 architecture:

@@ -8,7 +8,6 @@ constants:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       homolumo:
@@ -25,10 +24,6 @@ datamodule:
         split_test: 0.1
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -84,7 +79,6 @@ datamodule:
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
     # ipu_dataloader_training_opts:
     #   mode: async

@@ -60,7 +60,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       betagap:
@@ -88,12 +87,7 @@ datamodule:
         split_test: 0.1
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
     processed_graph_data_path: "../datacache/b3lyp/"
-    dataloading_from: ram
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -127,7 +121,6 @@ datamodule:
     num_workers: 0 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
 
 architecture:

@@ -8,7 +8,6 @@ constants:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       homolumo:
@@ -26,12 +25,7 @@ datamodule:
         split_names: ["train", "valid", "test-dev"]
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 20
-    featurization_progress: True
-    featurization_backend: "loky"
     processed_graph_data_path: "graphium/data/PCQM4Mv2/"
-    dataloading_from: ram
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),
     # 'possible_number_radical_e', 'possible_is_aromatic', 'possible_is_in_ring',
@@ -61,7 +55,6 @@ datamodule:
     num_workers: 40 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.
     # Using persistent_workers false might make the start of each epoch very long.
-    featurization_backend: "loky"
 
     # ipu_dataloader_training_opts:
     #   mode: async

@@ -83,12 +83,7 @@ architecture:
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
   args:
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 20
-    featurization_progress: True
-    featurization_backend: "loky"
     processed_graph_data_path: ${constants.datacache_path}
-    dataloading_from: "disk"
     num_workers: 20 # -1 to use all
     persistent_workers: True
     featurization:

@@ -81,13 +81,8 @@ architecture:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
     processed_graph_data_path: ${constants.datacache_path}
     num_workers: 40 # -1 to use all
     persistent_workers: False # if use persistent worker at the start of each epoch.

@@ -74,12 +74,7 @@ architecture:
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
   args:
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
     processed_graph_data_path:  ${constants.datacache_path}
-    dataloading_from: ram
     num_workers: 30 # -1 to use all
     persistent_workers: False
     featurization:

@@ -20,7 +20,6 @@ constants:
 datamodule:
   args:
     batch_size_training: 32
-    dataloading_from: ram
     persistent_workers: true
     num_workers: 4
 

@@ -25,7 +25,6 @@ metrics:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       homolumo:

@@ -4,7 +4,6 @@ datamodule:
   args:
     batch_size_training: 200
     batch_size_inference: 200
-    featurization_n_jobs: 20
     num_workers: 20
 
 predictor:

@@ -7,7 +7,6 @@ datamodule:
   args:
     batch_size_training: 2048
     batch_size_inference: 2048
-    featurization_n_jobs: 6
     num_workers: 6
 
 predictor:

@@ -4,7 +4,6 @@ datamodule:
   args:
     batch_size_training: 200
     batch_size_inference: 200
-    featurization_n_jobs: 4
     num_workers: 4
 
 predictor:

@@ -7,7 +7,6 @@ datamodule:
   args:
     batch_size_training: 200
     batch_size_inference: 200
-    featurization_n_jobs: 4
     num_workers: 4
 
 predictor:

@@ -62,7 +62,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       l1000_vcap:
@@ -133,11 +132,6 @@ datamodule:
         epoch_sampling_fraction: 1.0
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
-    dataloading_from: disk
     processed_graph_data_path: ${constants.datacache_path}
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),

@@ -62,7 +62,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
 
@@ -132,11 +131,6 @@ datamodule:
         #epoch_sampling_fraction: 1.0
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
-    dataloading_from: disk
     processed_graph_data_path: ${constants.datacache_path}
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),

@@ -62,7 +62,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
 
@@ -132,11 +131,6 @@ datamodule:
       #   epoch_sampling_fraction: 1.0
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
-    dataloading_from: disk
     processed_graph_data_path: ${constants.datacache_path}
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),

@@ -62,7 +62,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
 
@@ -132,11 +131,6 @@ datamodule:
         epoch_sampling_fraction: 1.0
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
-    dataloading_from: disk
     processed_graph_data_path: ${constants.datacache_path}
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),

@@ -51,7 +51,6 @@ accelerator:
 
 datamodule:
   module_type: "MultitaskFromSmilesDataModule"
-  # module_type: "FakeDataModule"  # Option to use generated data
   args: # Matches that in the test_multitask_datamodule.py case.
     task_specific_args:   # To be replaced by a new class "DatasetParams"
       qm9:
@@ -97,10 +96,6 @@ datamodule:
           method: "normal"
 
     # Featurization
-    prepare_dict_or_graph: pyg:graph
-    featurization_n_jobs: 30
-    featurization_progress: True
-    featurization_backend: "loky"
     processed_graph_data_path: "../datacache/neurips2023-small/"
     featurization:
     # OGB: ['atomic_num', 'degree', 'possible_formal_charge', 'possible_numH' (total-valence),