Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NeighborLoader: support temporal sampling with (FeatureStore, GraphStore) #4929

Merged
merged 5 commits into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update
  • Loading branch information
mananshah99 committed Jul 7, 2022
commit 10b84d0e65c26c1970662edcb8279376a60d7c4b
13 changes: 4 additions & 9 deletions test/loader/test_neighbor_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,20 +392,15 @@ def test_temporal_custom_neighbor_loader_on_cora(get_dataset, FeatureStore,

loader1 = NeighborLoader(hetero_data, num_neighbors=[-1, -1],
input_nodes='paper', time_attr='time',
batch_size=1)
batch_size=128)

loader2 = NeighborLoader(
(feature_store, graph_store),
num_neighbors=[-1, -1],
input_nodes=TensorAttr(group_name='paper', attr_name='x'),
time_attr='time',
batch_size=1,
batch_size=128,
)

num_iter = 10
for i, (batch1, batch2) in enumerate(zip(loader1, loader2)):
mask1 = batch1['paper'].time[0] >= batch1['paper'].time[1:]
mask2 = batch2['paper'].time[0] >= batch2['paper'].time[1:]
assert torch.all(mask1 & mask2)
if i > num_iter:
break
for batch1, batch2 in zip(loader1, loader2):
assert torch.equal(batch1['paper'].time, batch2['paper'].time)
13 changes: 13 additions & 0 deletions torch_geometric/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,23 @@ def get_all_edge_attrs(self) -> List[EdgeAttr]:
in `Data` and their layouts"""
if not hasattr(self, '_edge_attrs'):
return []
added_attrs = set()

# Check edges added via _put_edge_index:
edge_attrs = self._edge_attrs.values()
for attr in edge_attrs:
attr.size = (self.num_nodes, self.num_nodes)
added_attrs.add(attr.layout)

# Check edges added through regular interface:
# TODO deprecate this and store edge attributes for all edges in
# EdgeStorage
for layout, attr_name in EDGE_LAYOUT_TO_ATTR_NAME.items():
if attr_name in self and layout not in added_attrs:
edge_attrs.append(
EdgeAttr(edge_type=None, layout=layout,
size=(self.num_nodes, self.num_nodes)))

return edge_attrs


Expand Down
17 changes: 17 additions & 0 deletions torch_geometric/data/hetero_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,13 +787,30 @@ def get_all_edge_attrs(self) -> List[EdgeAttr]:
r"""Returns a list of `EdgeAttr` objects corresponding to the edge
indices stored in `HeteroData` and their layouts."""
out = []
added_attrs = set()

# Check edges added via _put_edge_index:
for edge_type, _ in self.edge_items():
if not hasattr(self[edge_type], '_edge_attrs'):
continue
edge_attrs = self[edge_type]._edge_attrs.values()
for attr in edge_attrs:
attr.size = self[edge_type].size()
added_attrs.add((attr.edge_type, attr.layout))
out.extend(edge_attrs)

# Check edges added through regular interface:
# TODO deprecate this and store edge attributes for all edges in
# EdgeStorage
for edge_type, edge_store in self.edge_items():
for layout, attr_name in EDGE_LAYOUT_TO_ATTR_NAME.items():
# Don't double count:
if attr_name in edge_store and ((edge_type, layout)
not in added_attrs):
out.append(
EdgeAttr(edge_type=edge_type, layout=layout,
size=self[edge_type].size()))

return out


Expand Down
4 changes: 2 additions & 2 deletions torch_geometric/loader/neighbor_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def __init__(
attr.index = None
time_tensors = feature_store.multi_get_tensor(time_attrs)
self.node_time_dict = {
mananshah99 marked this conversation as resolved.
Show resolved Hide resolved
time_attrs[i].group_name: time_tensors[i]
for i in range(len(time_tensors))
time_attr.group_name: time_tensor
for time_attr, time_tensor in zip(time_attrs, time_tensors)
}

# Obtain all node and edge metadata:
Expand Down