Skip to content

Commit

Permalink
Refactor the script to make host and port configurable
Browse files Browse the repository at this point in the history
Signed-off-by: Olga Bulat <obulat@gmail.com>
  • Loading branch information
obulat committed Jul 18, 2024
1 parent 2428975 commit 350507c
Showing 1 changed file with 60 additions and 32 deletions.
92 changes: 60 additions & 32 deletions utilities/dead_links/dead_link_tally.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,12 @@
from collections import defaultdict
from urllib.parse import urlparse

import click
from redis import Redis
from tqdm import tqdm


redis = Redis("localhost", port=6399, decode_responses=True)

cursor = 0

tallies = defaultdict(dict)
errors = dict()


def handle_matches(matches):
def handle_matches(redis, matches, tallies, errors):
values = redis.mget(matches)
for value, match in zip(values, matches):
try:
Expand All @@ -45,27 +38,62 @@ def handle_matches(matches):
errors[value] = e


total_to_process = redis.eval("return #redis.pcall('keys', 'valid:*')", 0)

with tqdm(total=total_to_process, miniters=10) as pbar:
cursor, matches = redis.scan(cursor=0, match="valid:*", count=250)
handle_matches(matches)
pbar.update(len(matches))
iter_count = 1

while cursor != 0:
cursor, matches = redis.scan(cursor=cursor, match="valid:*", count=250)
handle_matches(matches)
@click.command()
@click.option(
"--host",
help="Redis host to connect to",
type=str,
default="localhost",
)
@click.option(
"--port",
help="Port to connect to",
type=int,
default=None,
show_default=True,
)
def main(host: str, port: int | None):
port_str = f":{port}" if port is not None else ""
click.echo(f"Connecting to Redis cluster at {host}{port_str}")

redis_params = {"host": host, "decode_responses": True}
if port is not None:
redis_params["port"] = port

redis = Redis(**redis_params)
try:
redis.ping()
except Exception as e:
click.echo(f"Error connecting to Redis: {e}")
return

tallies = defaultdict(dict)
errors = dict()

total_to_process = redis.eval("return #redis.pcall('keys', 'valid:*')", 0)

with tqdm(total=total_to_process, miniters=10) as pbar:
cursor, matches = redis.scan(cursor=0, match="valid:*", count=250)
handle_matches(redis, matches, tallies, errors)
pbar.update(len(matches))
iter_count += 1
if iter_count % 10 == 0:
# only print each 10 iterations to ease I/O time spent
tqdm.write(
pprint.pformat(dict(cursor=cursor, **tallies), compact=True) + "\n"
)

print("\n\n\n\n============= FINAL RESULTS ============= \n\n")
pprint.pprint(tallies)

print("\n\n\n==================== ERRORS ===============\n\n")
pprint.pprint(errors)
iter_count = 1

while cursor != 0:
cursor, matches = redis.scan(cursor=cursor, match="valid:*", count=250)
handle_matches(redis, matches, tallies, errors)
pbar.update(len(matches))
iter_count += 1
if iter_count % 10 == 0:
# only print each 10 iterations to ease I/O time spent
tqdm.write(
pprint.pformat(dict(cursor=cursor, **tallies), compact=True) + "\n"
)
print("\n\n\n\n============= FINAL RESULTS ============= \n\n")
pprint.pprint(tallies)

print("\n\n\n==================== ERRORS ===============\n\n")
pprint.pprint(errors)


if __name__ == "__main__":
main()

0 comments on commit 350507c

Please sign in to comment.