Skip to content

Commit 088173f

Browse files
committed
Map seed file when generating inputs
1 parent a320bb9 commit 088173f

File tree

6 files changed

+50
-48
lines changed

6 files changed

+50
-48
lines changed
Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import os
22
import sys
3-
import string
43

54
# Read the node input file and translate the input IDs into a contiguous range.
65
# Then, read the relation input file and translate all source and destination node IDs
76
# to their updated contiguous values.
87

98
# User-provided input data directory
10-
if len(sys.argv) < 2 or os.path.exists(sys.argv[1]) == False:
9+
if len(sys.argv) < 2 or os.path.exists(sys.argv[1]) is False:
1110
print("Usage: generate_inputs.py [path_to_inputs]")
1211
exit(1)
1312

@@ -16,6 +15,7 @@
1615
# Input filenames
1716
nodefile = 'graph500-22_unique_node'
1817
relfile = 'graph500-22'
18+
seedfile = 'graph500-22-seed'
1919

2020
# Output data directory
2121
datadir = 'data'
@@ -26,30 +26,21 @@
2626
except OSError:
2727
pass
2828

29-
# Count the number of unique nodes in the data set
30-
num_nodes = sum(1 for line in open(os.path.join(inputdir, nodefile)))
31-
3229
updated_id = 0
3330

3431
updated_node_file = open(os.path.join(datadir, nodefile), 'w')
3532
updated_node_file.write('id\n') # Output a header row
3633
updated_relation_file = open(os.path.join(datadir, relfile), 'w')
37-
38-
# Scan the node file to find the highest node ID
39-
max_node = -1
40-
with open(os.path.join(inputdir, nodefile)) as f:
41-
for line in f:
42-
max_node = max(max_node, int(line))
34+
updated_seed_file = open(os.path.join(datadir, seedfile), 'w')
4335

4436
# Map every node ID to its line number
4537
# and generate an updated node file.
46-
placement = [0]*(max_node + 1)
38+
placement = {}
4739
with open(os.path.join(inputdir, nodefile)) as f:
4840
for line in f:
49-
node = int(line)
50-
placement[node] = updated_id
51-
updated_id += 1
41+
placement[int(line)] = updated_id
5242
updated_node_file.write('%d\n' % (updated_id))
43+
updated_id += 1
5344

5445
with open(os.path.join(inputdir, relfile)) as f:
5546
for line in f:
@@ -63,5 +54,10 @@
6354
# Output the updated edge description
6455
updated_relation_file.write("%d,%d\n" % (a, b))
6556

57+
with open(os.path.join(inputdir, seedfile)) as f:
58+
updated_seed_file.write(' '.join(str(placement[int(i)]) for i in f.read().split()))
59+
60+
6661
updated_node_file.close()
6762
updated_relation_file.close()
63+
updated_seed_file.close()
Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,21 @@
11
import os
22
import sys
3-
import string
43

54
# Read the node input file and translate the input IDs into a contiguous range.
65
# Then, read the relation input file and translate all source and destination node IDs
76
# to their updated contiguous values.
87

98
# User-provided input data directory
10-
if len(sys.argv) < 2 or os.path.exists(sys.argv[1]) == False:
9+
if len(sys.argv) < 2 or os.path.exists(sys.argv[1]) is False:
1110
print("Usage: generate_inputs.py [path_to_inputs]")
1211
exit(1)
1312

1413
inputdir = sys.argv[1]
1514

1615
# Input filenames
1716
nodefile = 'twitter_rv.net_unique_node'
18-
nodefile_out = 'twitter_rv_net_unique_node'
19-
relfile = 'twitter_rv'
17+
relfile = 'twitter_rv.net'
18+
seedfile = 'twitter_rv.net-seed'
2019

2120
# Output data directory
2221
datadir = 'data'
@@ -27,30 +26,21 @@
2726
except OSError:
2827
pass
2928

30-
# Count the number of unique nodes in the data set
31-
num_nodes = sum(1 for line in open(os.path.join(inputdir, nodefile)))
32-
3329
updated_id = 0
3430

35-
updated_node_file = open(os.path.join(datadir, nodefile_out), 'w')
31+
updated_node_file = open(os.path.join(datadir, nodefile.replace('.', '_')), 'w')
3632
updated_node_file.write('id\n') # Output a header row
37-
updated_relation_file = open(os.path.join(datadir, relfile), 'w')
38-
39-
# Scan the node file to find the highest node ID
40-
max_node = -1
41-
with open(os.path.join(inputdir, nodefile)) as f:
42-
for line in f:
43-
max_node = max(max_node, int(line))
33+
updated_relation_file = open(os.path.join(datadir, relfile.replace('.', '_')), 'w')
34+
updated_seed_file = open(os.path.join(datadir, seedfile.replace('.', '_')), 'w')
4435

4536
# Map every node ID to its line number
4637
# and generate an updated node file.
47-
placement = [0]*(max_node + 1)
38+
placement = {}
4839
with open(os.path.join(inputdir, nodefile)) as f:
4940
for line in f:
50-
node = int(line)
51-
placement[node] = updated_id
52-
updated_id += 1
41+
placement[int(line)] = updated_id
5342
updated_node_file.write('%d\n' % (updated_id))
43+
updated_id += 1
5444

5545
with open(os.path.join(inputdir, relfile)) as f:
5646
for line in f:
@@ -64,5 +54,10 @@
6454
# Output the updated edge description
6555
updated_relation_file.write("%d,%d\n" % (a, b))
6656

57+
with open(os.path.join(inputdir, seedfile)) as f:
58+
updated_seed_file.write(' '.join(str(placement[int(i)]) for i in f.read().split()))
59+
60+
6761
updated_node_file.close()
6862
updated_relation_file.close()
63+
updated_seed_file.close()

benchmark/redisgraph/kn.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import sys
1111
import os
12-
import threading
1312
import click
1413
import multiprocessing
1514
from query_runner import *
@@ -18,6 +17,7 @@
1817
# Global, map of reports.
1918
seedReports = {}
2019

20+
2121
#####################################################################
2222
# Initialize seed reporting,
2323
# seedReports[seed][iterations] contains the number of iterations required for seed
@@ -28,10 +28,11 @@ def InitSeedReports(seeds, iterations):
2828
for s in seeds:
2929
seedReports[s] = []
3030

31+
3132
#####################################################################
3233
# Generate a report summary.
3334
#######################################################################
34-
def FinalizeReport(depth, threads):
35+
def FinalizeReport(graphid, depth, threads):
3536
global seedReports
3637
# seed=19284, k=1, runId=0, avgNeighbor=91.0, execTime=0.197093009949
3738
# AVG Seed iterations.
@@ -42,14 +43,21 @@ def FinalizeReport(depth, threads):
4243
threadsTotalRuntime = [0] * threads
4344
runs = 0
4445

46+
# map to raw seed id
47+
raw_seeds = []
48+
if os.path.exists(graphid + '_unique_node'):
49+
for line in open(graphid + '_unique_node'):
50+
raw_seeds.append(line.strip())
51+
4552
for seed in seedReports:
53+
seed_raw = raw_seeds[int(seed)]
4654
report = seedReports[seed]
4755
for iterationReport in report:
4856
avgNeighbor = iterationReport['avgN']
4957
execTime = iterationReport['totalTime']
5058
threadId = iterationReport['threadId']
5159
threadsTotalRuntime[threadId] += execTime
52-
output += "seed=%s, k=%d, avgNeighbor=%d, execTime=%f[ms]\r\n" %(seed, depth, avgNeighbor, execTime)
60+
output += "seed=%s, k=%d, avgNeighbor=%d, execTime=%f[ms]\r\n" %(seed_raw, depth, avgNeighbor, execTime)
5361
output += "**************************************************************\r\n"
5462

5563
avgKNSize += avgNeighbor
@@ -69,6 +77,7 @@ def FinalizeReport(depth, threads):
6977

7078
return output
7179

80+
7281
#####################################################################
7382
# K-hop-path-neighbor-count benchmark workload.
7483
# (1) read prepared random nodes from a seed file under seed folder.
@@ -87,6 +96,7 @@ def GetSeeds(seed_file_path, count):
8796
print("Seed file does not contain enough seeds.")
8897
sys.exit()
8998

99+
90100
###############################################################
91101
# function: thread worker, pull work item from pool
92102
# and execute query via runner
@@ -97,7 +107,7 @@ def RunKNLatencyThread(graphid, threadId, depth, provider, label, seedPool, repo
97107
elif provider == "tigergraph":
98108
runner = TigerGraphQueryRunner()
99109
else:
100-
print "Unknown runner %s, quiting" % provider
110+
print("Unknown runner %s, quiting" % provider)
101111
sys.exit()
102112

103113
# As long as there's work to do...
@@ -132,23 +142,24 @@ def RunKNLatencyThread(graphid, threadId, depth, provider, label, seedPool, repo
132142
iterationSummary['totalTime'] = iterationTime
133143
reportQueue.put(iterationSummary, False)
134144

145+
135146
###############################################################
136147
# function: check the total latency for k-hop-path neighbor count
137148
# query for a given set of seeds.
138149
################################################################
139150
@click.command()
140-
@click.option('--graphid', '-g', default='graph500', help="graph id")
141-
@click.option('--seedfile', '-s', default='./seeds', help="seed file")
151+
@click.option('--graphid', '-g', default='graph500-22',
152+
type=click.Choice(['graph500-22', 'twitter_rv_net']), help="graph id")
142153
@click.option('--count', '-c', default=20, help="number of seeds")
143154
@click.option('--depth', '-d', default=1, help="number of hops to perform")
144155
@click.option('--provider', '-p', default='redisgraph', help="graph identifier")
145156
@click.option('--label', '-l', default='label', help="node label")
146157
@click.option('--threads', '-t', default=2, help="number of querying threads")
147158
@click.option('--iterations', '-i', default=10, help="number of iterations per query")
148-
def RunKNLatency(graphid, seedfile, count, depth, provider, label, threads, iterations):
159+
def RunKNLatency(graphid, count, depth, provider, label, threads, iterations):
149160
#create result folder
150161
global seedReports
151-
162+
seedfile = os.path.join('data', graphid + '-seed')
152163
seeds = GetSeeds(seedfile, count)
153164

154165
# Create a pool of seeds.
@@ -188,7 +199,7 @@ def RunKNLatency(graphid, seedfile, count, depth, provider, label, threads, iter
188199
seedReports[seed].append({'avgN': avgN, 'totalTime': totalTime, 'threadId': threadId})
189200

190201
print("Finalizing report")
191-
output = FinalizeReport(depth, threads)
202+
output = FinalizeReport(graphid, depth, threads)
192203
dirName = "./result_" + provider +"/"
193204
fileName = "KN-latency-k%d-threads%d-iter%d" %(depth, threads, iterations)
194205
outputPath = os.path.join(dirName, fileName)

benchmark/redisgraph/query_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def KN(self, root, depth):
4545
query = "MATCH (s:%s)-[*%d]->(t) WHERE s.id=%d RETURN count(t)" % (self.label, int(depth), int(root))
4646
result = self.driver.execute_command('graph.query', self.graphid, query)
4747
except Exception as e: # timeout, we return -1, reset session
48-
print "Exception: %s" % e
48+
print("Exception: %s" % e)
4949
raise e
5050
return -1
5151
else:

benchmark/redisgraph/redisgraph_load_graph500.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ fi
77
python generate_graph500_inputs.py $2 || exit 1
88

99
# Run RedisGraph bulk import script
10-
python $1/demo/bulk_insert/bulk_insert.py graph500 -n data/graph500-22_unique_node -r data/graph500-22 || exit 1
10+
python $1/demo/bulk_insert/bulk_insert.py graph500-22 -n data/graph500-22_unique_node -r data/graph500-22 || exit 1
1111

1212
# Create index on node ID property
13-
~/redis/src/redis-cli GRAPH.QUERY graph500 "create index on :graph500-22_unique_node(id)"
13+
~/redis/src/redis-cli GRAPH.QUERY graph500-22 "create index on :graph500-22_unique_node(id)"

benchmark/redisgraph/redisgraph_load_twitter.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ fi
77
python generate_twitter_inputs.py $2 || exit 1
88

99
# Run RedisGraph bulk import script
10-
python $1/demo/bulk_insert/bulk_insert.py twitter_rv -n data/twitter_rv_net_unique_node -r data/twitter_rv || exit 1
10+
python $1/demo/bulk_insert/bulk_insert.py twitter_rv_net -n data/twitter_rv_net_unique_node -r data/twitter_rv_net || exit 1
1111

1212
# Create index on node ID property
13-
~/redis/src/redis-cli GRAPH.QUERY twitter_rv "create index on :twitter_rv_net_unique_node(id)"
13+
~/redis/src/redis-cli GRAPH.QUERY twitter_rv_net "create index on :twitter_rv_net_unique_node(id)"

0 commit comments

Comments
 (0)