-
Notifications
You must be signed in to change notification settings - Fork 266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GraphScope on 3/3 part 1: add local/dist trainer, and a Data
class to make the example simpler
#234
Changes from 1 commit
10ad969
b742d6a
06f969f
ee8e2cd
4dac6c8
b655e23
fc7aa41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,5 @@ examples/tf/*.tar.gz | |
# core dump | ||
/core | ||
|
||
# ignore generated __init__.py | ||
/__init__.py |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright 2021 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= | ||
|
||
import os | ||
import sys | ||
|
||
try: | ||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) | ||
|
||
try: | ||
import graphlearn.python.nn.pytorch | ||
from .pytorch.gcn.gcn import GCN as TorchGCN | ||
except Exception: | ||
pass | ||
|
||
try: | ||
import graphlearn.python.nn.tf | ||
|
||
from .tf.trainer import LocalTrainer, DistTrainer | ||
|
||
# backwards compatibility | ||
LocalTFTrainer = LocalTrainer | ||
DistTFTrainer = DistTrainer | ||
|
||
from .tf.bipartite_sage.bipartite_sage import BipartiteGraphSAGE | ||
from .tf.bipartite_sage.hetero_edge_inducer import HeteroEdgeInducer | ||
from .tf.ego_bipartite_sage.ego_bipartite_sage import EgoBipartiteGraphSAGE | ||
from .tf.ego_gat.ego_gat import EgoGAT | ||
from .tf.ego_rgcn.ego_rgcn import EgoRGCN | ||
from .tf.ego_sage.ego_sage import EgoGraphSAGE | ||
from .tf.sage.edge_inducer import EdgeInducer | ||
from .tf.seal.edge_cn_inducer import EdgeCNInducer | ||
from .tf.ultra_gcn.ultra_gcn import UltraGCN | ||
except: | ||
pass | ||
|
||
finally: | ||
sys.path.pop(sys.path.index(os.path.join(os.path.dirname(__file__), '..', '..'))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Copyright 2021-2022 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Copyright 2021-2022 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Copyright 2021-2022 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= | ||
|
||
from .bipartite_sage import BipartiteGraphSAGE | ||
from .hetero_edge_inducer import HeteroEdgeInducer |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Copyright 2021-2022 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= | ||
|
||
from .ego_bipartite_sage import EgoBipartiteGraphSAGE |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# Copyright 2021 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import argparse | ||
import datetime | ||
import json | ||
import os | ||
import sys | ||
|
||
import numpy as np | ||
try: | ||
# https://www.tensorflow.org/guide/migrate | ||
import tensorflow.compat.v1 as tf | ||
tf.disable_v2_behavior() | ||
except ImportError: | ||
import tensorflow as tf | ||
|
||
import graphlearn as gl | ||
import graphlearn.python.nn.tf as tfg | ||
from graphlearn.python.utils import parse_nbrs_num | ||
|
||
class EgoData: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A rough code of my opinion, just for reference. Base class: class EgoSampleLoaderBase: # just for example, maybe we could find a better class name
def __init__(self, graph, nbr_num, sampler, batch_size, mask="train"):
# ..
if mask == 'train':
tfg.conf.training = True
self.sample_query = self.query(graph, mask)
ds = tfg.Dataset(self.query_train, window=10)
self._iterator = ...
def _query(self):
raise NotImplementedError...
def _format(self, ...):
raise NotImplementedError...
@property
def iterator(self):
return self._iterator
def as_list(self):
return self._format()
@property
def src(self):
return self._data_dict['seed']
def hop(self, idx):
return self._data_dict['hop1'] # Just for example Example inherit class class EgoRGCNSampleLoader:
def _query(self):
# ...
def _format(self):
# ... Usage in train.py graph = g.init()
model = EgoRGCN(...)
train_sample = EgoRGCNSamplLoader(g, nbr_num, "random", 128, 'train')
train_emb = model.forward(train_sample.as_list())
loss = loss_fn(train_emb, train_sample.src.labels)
trainer = Trainer(train_sample.iterator, loss)
trainer.run()
# for test
test_sample_loader = EgoRGCNSamplLoader(g, nbr_num, "random", 128, 'test')
# ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks fine, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is better to put all sampling and data preprocessing into a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
def __init__(self, graph, model, nbrs_num=None, sampler='random', | ||
train_batch_size=128, test_batch_size=128, val_batch_size=128): | ||
self.graph = graph | ||
self.model = model | ||
self.nbrs_num = parse_nbrs_num(nbrs_num) | ||
self.train_batch_size = train_batch_size | ||
self.test_batch_size = test_batch_size | ||
self.val_batch_size = val_batch_size | ||
self.sampler = sampler | ||
|
||
# train | ||
tfg.conf.training = True | ||
self.query_train = self.query(self.graph, gl.Mask.TRAIN) | ||
self.dataset_train = tfg.Dataset(self.query_train, window=10) | ||
self.train_iterator = self.dataset_train.iterator | ||
self.train_dict = self.dataset_train.get_data_dict() | ||
self.train_embedding = self.model.forward( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better not to encapsulate the model training into the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is moving There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move all model-related data outside. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
self.reformat_node_feature( | ||
self.train_dict, | ||
self.query_train.list_alias(), | ||
tfg.FeatureHandler('feature_handler', self.query_train.get_node("train").decoder.feature_spec), | ||
), | ||
self.nbrs_num | ||
) | ||
|
||
# test | ||
tfg.conf.training = False | ||
self.query_test = self.query(self.graph, gl.Mask.TEST) | ||
self.dataset_test = tfg.Dataset(self.query_test, window=10) | ||
self.test_iterator = self.dataset_test.iterator | ||
self.test_dict = self.dataset_test.get_data_dict() | ||
self.test_embedding = self.model.forward( | ||
self.reformat_node_feature( | ||
self.test_dict, | ||
self.query_test.list_alias(), | ||
tfg.FeatureHandler('feature_handler', self.query_test.get_node("test").decoder.feature_spec), | ||
), | ||
self.nbrs_num | ||
) | ||
|
||
# val | ||
tfg.conf.training = False | ||
self.query_val = self.query(self.graph, gl.Mask.VAL) | ||
self.dataset_val = tfg.Dataset(self.query_val, window=10) | ||
self.val_iterator = self.dataset_val.iterator | ||
self.val_dict = self.dataset_val.get_data_dict() | ||
self.val_embedding = self.model.forward( | ||
self.reformat_node_feature( | ||
self.val_dict, | ||
self.query_val.list_alias(), | ||
tfg.FeatureHandler('feature_handler', self.query_val.get_node("val").decoder.feature_spec), | ||
), | ||
self.nbrs_num | ||
) | ||
|
||
def query(self, graph, mask=gl.Mask.TRAIN): | ||
""" | ||
""" | ||
|
||
def reformat_node_feature(self, data_dict, alias_list, feature_handler): | ||
""" | ||
""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# Copyright 2021-2022 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= | ||
|
||
from .ego_gat import EgoGAT |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# [GCN](https://arxiv.org/abs/1609.02907) | ||
## Introduction | ||
Here we implement fix-sized neighbor sampling based GCN. | ||
|
||
## How to run | ||
### Node classification | ||
Here we use cora as an example, | ||
|
||
1. Prepare data | ||
```shell script | ||
cd ../../data/ | ||
python cora.py | ||
``` | ||
|
||
2. Train | ||
```shell script | ||
cd ../tf/ego_gcn/ | ||
python train_supervised.py | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Copyright 2021-2022 Alibaba Group Holding Limited. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================= | ||
|
||
from .ego_gcn import EgoGCN | ||
from .ego_gcn_data import EgoGCNData |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ego_data.py ->ego_data_loader.py
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, renamed
ego_rgcn_data_loader.py
andego_sage_data_loader.py
as well.