Add Operators, models and node cache

alibaba · Seventeen17 · Apr 24, 2023 · Apr 10, 2023 · Apr 12, 2023 · Apr 12, 2023
commit ef3ad92f6f76416f52357e23868a3eddfefce967
diff --git a/graphlearn/examples/basic/test_local_temporal_loader.py b/graphlearn/examples/basic/test_local_temporal_loader.py
@@ -0,0 +1,55 @@
+# Copyright 2023 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+from tqdm import tqdm
+
+import graphlearn as gl
+import graphlearn.python.nn.pytorch as thg
+
+
+def main(argv):
+  cur_path = sys.path[0]
+  gl.set_tape_capacity(1)
+
+  g = gl.Graph()
+  g.node(os.path.join(cur_path, "data/user"),
+         node_type="user", decoder=gl.Decoder(weighted=True)) \
+    .edge(os.path.join(cur_path, "data/u-u"),
+          edge_type=("user", "user", "interaction"),
+          decoder=gl.Decoder(weighted=True, timestamped=True))
+  g.init()
+
+  query = g.E("interaction").batch(1).alias("event").values()
+
+  ds = thg.TemporalDataset(query, 1, event_name="event")
+  dl = thg.TemporalDataLoader(ds)
+
+  for idx, data in tqdm(enumerate(dl)):
+    print("res:", idx, data)
+    print(data.src)
+    print(data.dst)
+    print(data.msg)
+    print(data.t)
+
+  g.close()
+
+
+if __name__ == "__main__":
+  main(sys.argv[1:])
diff --git a/graphlearn/examples/basic/test_local_temporal_sampler.py b/graphlearn/examples/basic/test_local_temporal_sampler.py
@@ -0,0 +1,62 @@
+# Copyright 2023 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+
+import graphlearn as gl
+
+def main(argv):
+  cur_path = sys.path[0]
+  gl.set_tape_capacity(1)
+  gl.set_padding_mode(0)
+  gl.set_default_neighbor_id(-1)
+
+  g = gl.Graph()
+  g.node(os.path.join(cur_path, "data/user"),
+         node_type="user", decoder=gl.Decoder(weighted=True)) \
+    .edge(os.path.join(cur_path, "data/u-u"),
+          edge_type=("user", "user", "interaction"),
+          decoder=gl.Decoder(weighted=True, timestamped=True))
+  g.init()
+
+  events = g.E("interaction").batch(2).alias("event")
+  srcV = events.outV().alias('src')
+  dstV = events.inV().alias('pos_dst')
+  negV = srcV.outNeg("interaction").sample(1).by("random").alias("neg_dst")
+  srcV_nbr = srcV.outE("interaction").sample(3).by("topk").alias("src_nbr")
+  dstV_nbr = dstV.outE("interaction").sample(3).by("topk").alias("dst_nbr")
+  negV_nbr = negV.outE("interaction").sample(3).by("topk").alias("neg_nbr")
+
+  query = events.values()
+
+  ds = gl.Dataset(query, 1)
+
+  while True:
+    try:
+      event = ds.next()
+      print(event["event"].src_ids, event["event"].dst_ids, event["event"].timestamps)
+      print(event["dst_nbr"].dst_ids, event["dst_nbr"].timestamps)
+    except gl.OutOfRangeError:
+      break
+
+  g.close()
+
+
+if __name__ == "__main__":
+  main(sys.argv[1:])
diff --git a/graphlearn/examples/basic/test_subgraph.py b/graphlearn/examples/basic/test_subgraph.py
@@ -0,0 +1,68 @@
+# Copyright 2023 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import getopt
+import os
+import sys
+
+import graphlearn as gl
+from query_examples import *
+
+
+def main(argv):
+  cur_path = sys.path[0]
+  g = gl.Graph()
+  g.node(os.path.join(cur_path, "data/user"),
+         node_type="user", decoder=gl.Decoder(weighted=True)) \
+    .node(os.path.join(cur_path, "data/entity"),
+          node_type="entity", decoder=gl.Decoder(attr_types=['float', 'float', 'float', 'float'], labeled=True)) \
+    .edge(os.path.join(cur_path, "data/relation"),
+          edge_type=("entity", "entity", "relation"), decoder=gl.Decoder(weighted=True), directed=False)
+
+  g.init()
+  num_nbrs = [10, 10]
+  edge_sampler = g.edge_sampler('relation', batch_size=1, strategy="by_order") # bz must be 1.
+  node_sampler = g.node_sampler('entity', batch_size=10, strategy="by_order")
+  sampler = g.subgraph_sampler('relation', num_nbrs, need_dist=True) # random_edge, in_order_edge
+  edges = edge_sampler.get()
+  nodes = node_sampler.get()
+  #subgraph = sampler.get(nodes.ids)
+  subgraph = sampler.get(edges.src_ids, edges.dst_ids)
+  print('edge_index: ', subgraph.edge_index)
+  print('nodes: ', subgraph.nodes.ids)
+  print('edges: ', subgraph.edges.edge_ids)
+  print("dist_to_src :", subgraph.dist_to_src)
+  print("dist_to_dst :", subgraph.dist_to_dst)
+
+  num_nbrs=[2]
+  # test GSL.
+  query = g.E('relation').batch(1).shuffle().alias('relation').SubGraph('relation', num_nbrs, need_dist=True).alias('sub').values()
+  #query = g.V('entity').shuffle().batch(128).SubGraph('relation', num_nbrs, need_dist=False).alias('sub').values()
+  ds = gl.Dataset(query)
+  subgraph = ds.next()['sub']
+  print('edge_index: ', subgraph.edge_index)
+  print('nodes: ', subgraph.nodes.ids)
+  print('nodes.float_attrs: ', subgraph.nodes.float_attrs)
+  print('edges: ', subgraph.edges.edge_ids)
+  print("dist_to_src :", subgraph.dist_to_src)
+  print("dist_to_dst :", subgraph.dist_to_dst)
+  g.close()
+
+
+if __name__ == "__main__":
+  main(sys.argv[1:])
diff --git a/graphlearn/examples/data/gen_temporal_data.py b/graphlearn/examples/data/gen_temporal_data.py
@@ -0,0 +1,53 @@
+# Copyright 2023 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Generate graph data with timestamps
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+cur_path = sys.path[0]
+
+def gen_files():
+    import random
+
+    u_count = 20
+
+    with open(os.path.join(cur_path, "data/user"), 'w') as f:
+        s = 'id:int64\tweight:float\n'
+        f.write(s)
+        for i in range(u_count):
+            s = '%d\t%f\n' % (i, i / 10.0)
+            f.write(s)
+
+
+    with open(os.path.join(cur_path, "data/u-u"), 'w') as f:
+        s = 'src_id:int64\tdst_id:int64\tweight:float\ttimestamp:int64\n'
+        f.write(s)
+        fanout = 5
+        ts = [i for i in range(u_count * fanout)]
+        import random
+        random.shuffle(ts)
+        print(ts)
+        for i in range(u_count):
+            for j in range(fanout):
+                dst = random.randint(0, u_count - 1)
+                s = '%d\t%d\t%f\t%d\n' % (i, dst, (i + dst) * 0.1, ts[i * fanout + j])
+                f.write(s)
+
+gen_files()
diff --git a/graphlearn/examples/data/jodie.py b/graphlearn/examples/data/jodie.py
@@ -0,0 +1,86 @@
+# Copyright 2023 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" Processor of dynamic graph data Jodie-Wikipedia
+from http://snap.stanford.edu/jodie
+"""
+import os
+import pandas as pd
+import urllib.request
+
+os.system("mkdir -p jodie")
+path = "jodie/wikipedia.csv"
+url = 'http://snap.stanford.edu/jodie/wikipedia.csv'
+data = urllib.request.urlopen(url)
+
+with open(path, 'wb') as f:
+  while True:
+    chunk = data.read(10 * 1024 * 1024)
+    if not chunk:
+      break
+    f.write(chunk)
+
+df = pd.read_csv(path, skiprows=1, header=None)
+
+src = df.iloc[:, 0].values
+dst = df.iloc[:, 1].values
+
+t = df.iloc[:, 2].values
+y = df.iloc[:, 3].values
+msg = df.iloc[:, 4:].values
+
+# train:val:test = 0.7:0.15:0.15
+val_idx = 110232
+test_idx = 133853
+max_src_id = 8226
+max_dst_id = 9227
+
+def gen_edges(path, start=0, end=-1):
+  schema = "sid:int64\tdid:int64\ttimestamp:int64\tattrs:string\n"
+  with open(path, 'w') as f:
+    f.write(schema)
+    for sid, did, ts, attr in zip(
+        src[start:end], dst[start:end], t[start:end], msg[start:end]):
+      attrs = [str(x) for x in attr.tolist()]
+      attrs = ':'.join([str(x) for x in attr.tolist()])
+      f.write("{}\t{}\t{}\t{}\n".format(sid, did + max_src_id + 1, int(ts), attrs))
+
+def gen_nodes():
+  with open("jodie/src", 'w') as f:
+    f.write("id:int64\n")
+    for i in range(max_src_id + 1):
+      f.write(str(i) + "\n")
+
+  with open("jodie/dst", 'w') as f:
+      f.write("id:int64\n")
+      for i in range(max_src_id + 1, max_dst_id):
+        f.write(str(i) + "\n")
+
+def gen_nodes_withid():
+  with open("jodie/src_feat", 'w') as f:
+    f.write("id:int64\tattribute:string\n")
+    for i in range(max_src_id + 1):
+      f.write("{}\t{}\n".format(i, i))
+
+  with open("jodie/dst_feat", 'w') as f:
+      f.write("id:int64\tattribute:string\n")
+      for i in range(max_src_id + 1, max_dst_id):
+        f.write("{}\t{}\n".format(i, i))
+
+gen_edges("jodie/wikipedia")
+gen_edges("jodie/wikipedia_train", end=val_idx)
+gen_edges("jodie/wikipedia_val", start=val_idx, end=test_idx)
+gen_edges("jodie/wikipedia_test", start=val_idx)
+gen_nodes()
+gen_nodes_withid()