Skip to content

Updated geo benchmarks to fully saturate one primary. Include zset benchmarks with long score #214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
446 changes: 280 additions & 166 deletions commands-priority.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions redis_benchmarks_specification/__cli__/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def spec_cli_args(parser):
help="Include modules statistics on commandstats.",
)
parser.add_argument("--summary-csv", type=str, default="")
parser.add_argument("--group-csv", type=str, default="")
parser.add_argument("--commands-json-file", type=str, default="./commands.json")
parser.add_argument(
"--commands-priority-file", type=str, default="./commands-priority.json"
Expand Down
91 changes: 89 additions & 2 deletions redis_benchmarks_specification/__cli__/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import redis
import oyaml as yaml
import csv

from redis_benchmarks_specification.__common__.runner import get_benchmark_specs

Expand Down Expand Up @@ -224,6 +225,24 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
yaml.dump(benchmark_config, file, sort_keys=False, width=100000)
total_tracked_commands_pct = "n/a"

module_names = {
"ft": "redisearch",
"search": "redisearch",
"_ft": "redisearch",
"graph": "redisgraph",
"ts": "redistimeseries",
"timeseries": "redistimeseries",
"json": "redisjson",
"bf": "redisbloom",
"cf": "redisbloom",
"topk": "redisbloom",
"cms": "redisbloom",
"tdigest": "redisbloom",
}

group_usage_calls = {}
group_usage_usecs = {}

if args.commandstats_csv != "":
logging.info(
"Reading commandstats csv {} to determine commands/test coverage".format(
Expand All @@ -234,6 +253,7 @@ def generate_stats_cli_command_logic(args, project_name, project_version):

rows = []
priority = {}
priority_usecs = {}

# open file in read mode
total_count = 0
Expand All @@ -246,10 +266,13 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
csv_reader = reader(x.replace("\0", "") for x in read_obj)
# Iterate over each row in the csv using reader object
for row in csv_reader:
if len(row) == 0:
if len(row) <= 2:
continue
# row variable is a list that represents a row in csv
cmdstat = row[0]
cmdstat = cmdstat.lower()
if "cmdstat_" not in cmdstat:
continue
cmdstat = cmdstat.replace("cmdstat_", "")
count = int(row[1])
usecs = None
Expand All @@ -265,6 +288,15 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
deprecated = False
if "." in cmdstat:
module = True
cmd_module_prefix = cmdstat.split(".")[0]
if cmd_module_prefix in module_names:
group = module_names[cmd_module_prefix]
else:
logging.error(
"command with a module prefix does not have module name {}".format(
cmd_module_prefix
)
)
if cmd in commands_json:
command_json = commands_json[cmd]
group = command_json["group"]
Expand All @@ -273,14 +305,35 @@ def generate_stats_cli_command_logic(args, project_name, project_version):

if module is False or include_modules:
priority[cmd.lower()] = count
if type(usecs) == int:
priority_usecs[cmd.lower()] = usecs

if cmdstat in tracked_commands_json:
tracked = True
if module is False or include_modules:
row = [cmdstat, group, count, usecs, tracked, deprecated]
rows.append(row)
if group not in group_usage_calls:
group_usage_calls[group] = {}
group_usage_calls[group]["call"] = 0
if group not in group_usage_usecs:
group_usage_usecs[group] = {}
group_usage_usecs[group]["usecs"] = 0
if type(count) == int:
group_usage_calls[group]["call"] = (
group_usage_calls[group]["call"] + count
)
if type(usecs) == int:
group_usage_usecs[group]["usecs"] = (
group_usage_usecs[group]["usecs"] + usecs
)
if group == "n/a":
logging.warn("Unable to detect group in {}".format(cmd))

priority_list = sorted(((priority[cmd], cmd) for cmd in priority), reverse=True)
priority_list_usecs = sorted(
((priority_usecs[cmd], cmd) for cmd in priority_usecs), reverse=True
)

priority_json = {}
top_10_missing = []
Expand All @@ -291,6 +344,16 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
count = x[0]
total_count += count

for group_name, group in group_usage_calls.items():
call = group["call"]
pct = call / total_count
group["pct"] = pct

for group_name, group in group_usage_usecs.items():
usecs = group["usecs"]
pct = usecs / total_usecs
group["pct"] = pct

for pos, x in enumerate(priority_list, 1):
count = x[0]
cmd = x[1]
Expand All @@ -315,6 +378,31 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
)
json.dump(priority_json, fd, indent=True)

if args.group_csv != "":
header = [
"group",
"count",
"usecs",
"usec_per_call",
"% count",
"% usecs",
]
with open(args.group_csv, "w", encoding="UTF8", newline="") as f:
writer = csv.writer(f)

# write the header
writer.writerow(header)
for group_name, group_usage_info in group_usage_calls.items():
count = group_usage_info["call"]
call_pct = group_usage_info["pct"]
usecs = group_usage_usecs[group_name]["usecs"]
usecs_pct = group_usage_usecs[group_name]["pct"]
usecs_per_call = usecs / count

writer.writerow(
[group_name, count, usecs, usecs_per_call, call_pct, usecs_pct]
)

if args.summary_csv != "":
header = [
"command",
Expand All @@ -326,7 +414,6 @@ def generate_stats_cli_command_logic(args, project_name, project_version):
"% count",
"% usecs",
]
import csv

with open(args.summary_csv, "w", encoding="UTF8", newline="") as f:
writer = csv.writer(f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: --pipeline 10 -c 2 -t 2 --command="GEODIST key 1 2" --hide-histogram --test-time 180
arguments: --pipeline 10 -c 50 -t 4 --command="GEODIST key 1 2" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: -c 2 -t 2 --command="GEODIST key 1 2" --hide-histogram --test-time 180
arguments: -c 50 -t 4 --command="GEODIST key 1 2" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: --pipeline 10 -c 2 -t 2 --command="GEOHASH key 1" --hide-histogram --test-time 180
arguments: --pipeline 10 -c 50 -t 4 --command="GEOHASH key 1" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: -c 2 -t 2 --command="GEOHASH key 1" --hide-histogram --test-time 180
arguments: -c 50 -t 4 --command="GEOHASH key 1" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: --pipeline 10 -c 2 -t 2 --command="GEOPOS key 1" --hide-histogram --test-time 180
arguments: --pipeline 10 -c 50 -t 4 --command="GEOPOS key 1" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: -c 2 -t 2 --command="GEOPOS key 1" --hide-histogram --test-time 180
arguments: -c 50 -t 4 --command="GEOPOS key 1" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYBOX 200 200 KM" --hide-histogram --test-time 180
arguments: -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYBOX 200 200 KM" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: --pipeline 10 -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM" --hide-histogram --test-time 180
arguments: --pipeline 10 -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dbconfig:
dataset: https://s3.us-east-2.amazonaws.com/redis.benchmarks.spec/datasets/geopoint/dump.rdb
resources:
requests:
memory: 1g
memory: 6g
tested-groups:
- geo
tested-commands:
Expand All @@ -21,7 +21,7 @@ build-variants:
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: -c 2 -t 2 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM" --hide-histogram --test-time 180
arguments: -c 2 -t 4 --command="GEOSEARCH key FROMLONLAT 7.0 55.0 BYRADIUS 200 KM" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: 0.4
name: memtier_benchmark-1key-zset-10-elements-zrange-all-elements-long-scores
description: 'Runs memtier_benchmark, for a keyspace length of 1 SORTED SET key. The SORTED SET contains 10 elements in it and we query it using ZRANGE BYSCORE with a range of all elements. The scores are long numbers.'
dbconfig:
configuration-parameters:
save: '""'
check:
keyspacelen: 1
resources:
requests:
memory: 1g
init_commands:
- '"ZADD" "zset:10:long_score" "10000000" "lysbgqqfqw" "10000001" "mtccjerdon" "10000002" "jekkafodvk" "10000003" "nmgxcctxpn" "10000004" "vyqqkuszzh" "10000005" "pytrnqdhvs" "10000006" "oguwnmniig" "10000007" "gekntrykfh" "10000008" "nhfnbxqgol" "10000009" "cgoeihlnei"'
tested-groups:
- sorted-set
tested-commands:
- zrange
redis-topologies:
- oss-standalone
build-variants:
- gcc:8.5.0-amd64-debian-buster-default
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: --command="ZRANGE zset:10:long_score 0 1000000000 BYSCORE WITHSCORES" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
memory: 2g

priority: 53
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
version: 0.4
name: memtier_benchmark-1key-zset-100-elements-zrangebyscore-all-elements-long-scores
description: 'Runs memtier_benchmark, for a keyspace length of 1 SORTED SET key. The SORTED SET contains 100 elements in it and we query it using ZRANGEBYSCORE with a range of all elements. '
dbconfig:
configuration-parameters:
save: '""'
check:
keyspacelen: 1
resources:
requests:
memory: 1g
init_commands:
- '"ZADD" "zset:10:long_score" "10000000" "lysbgqqfqw" "10000001" "mtccjerdon" "10000002" "jekkafodvk" "10000003" "nmgxcctxpn" "10000004" "vyqqkuszzh" "10000005" "pytrnqdhvs" "10000006" "oguwnmniig" "10000007" "gekntrykfh" "10000008" "nhfnbxqgol" "10000009" "cgoeihlnei"'
tested-groups:
- sorted-set
tested-commands:
- zrangebyscore
redis-topologies:
- oss-standalone
build-variants:
- gcc:8.5.0-amd64-debian-buster-default
clientconfig:
run_image: redislabs/memtier_benchmark:edge
tool: memtier_benchmark
arguments: --command="ZRANGEBYSCORE zset:100:long_score 0 1 WITHSCORES" --hide-histogram --test-time 180
resources:
requests:
cpus: '4'
memory: 2g

priority: 7