Skip to content

Commit c098536

Browse files
committed
Change the normalized_email field to actually use normalize_email()
1 parent 70ea84e commit c098536

File tree

3 files changed

+103
-1
lines changed

3 files changed

+103
-1
lines changed

dbschema/migrations/00006.edgeql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CREATE MIGRATION m1fmqcwgzvkve5zaexz2qzd326ilc4xem2lrebzhsqhtrfv3iyhkza
2+
ONTO m1gibmer4jzdtynetjkyk5iptdavgrgxf5golmpxdm4zmnwvsrvypq
3+
{
4+
ALTER TYPE default::ContributorLicenseAgreement {
5+
ALTER PROPERTY normalized_email {
6+
USING (default::normalize_email(.email));
7+
};
8+
};
9+
};

dbschema/structure.esdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ module default {
114114
};
115115
index on (.email);
116116

117-
property normalized_email := str_lower(.email);
117+
property normalized_email := normalize_email(.email);
118118
constraint exclusive on (.normalized_email);
119119
index on (.normalized_email);
120120

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Deduplicate the database's entries that should be the same normalized email.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import atexit
10+
import os
11+
import time
12+
from urllib.parse import urlparse
13+
14+
from dotenv import load_dotenv
15+
import edgedb
16+
from edgedb.pgproto import pgproto
17+
from rich.console import Console
18+
from rich.progress import Progress
19+
from rich.prompt import Prompt
20+
21+
22+
load_dotenv()
23+
24+
DATABASE_URL = os.environ["DATABASE_URL"]
25+
EDGEDB_PASSWORD = urlparse(DATABASE_URL).password
26+
27+
28+
class RollBack(Exception):
29+
"""Indicates we want to roll a transaction back."""
30+
31+
32+
console = Console()
33+
print = console.print
34+
35+
36+
print("Connecting to EdgeDB", end="... ")
37+
con = edgedb.create_client(
38+
host="localhost",
39+
user="edgedb",
40+
database="edgedb",
41+
password=EDGEDB_PASSWORD,
42+
)
43+
print("connected.")
44+
atexit.register(con.close)
45+
46+
NormEmail = str
47+
ID = pgproto.UUID
48+
seen: dict[NormEmail, set[ID]] = {}
49+
to_delete: set[ID] = set()
50+
51+
with Progress(console=console, transient=True) as progress:
52+
result = con.query(
53+
"""
54+
SELECT ContributorLicenseAgreement {
55+
id,
56+
email,
57+
norm_email := normalize_email(.email)
58+
}
59+
ORDER BY .creation_time;
60+
""",
61+
)
62+
task = progress.add_task("Processing", total=len(result))
63+
for elem in result:
64+
if elem.norm_email in seen:
65+
to_delete.add((elem.id, elem.norm_email))
66+
seen.setdefault(elem.norm_email, set()).add(elem.id)
67+
progress.advance(task)
68+
69+
print(f"Seen {len(seen)} normalized emails.")
70+
print(f"Should delete {len(to_delete)} duplicates.")
71+
72+
try:
73+
for tx in con.transaction():
74+
with tx:
75+
with Progress(console=console, transient=True) as progress:
76+
task = progress.add_task("Deleting duplicates", total=len(to_delete))
77+
for elem_id, elem_email in sorted(to_delete):
78+
print(elem_email)
79+
tx.query(
80+
"""
81+
DELETE ContributorLicenseAgreement
82+
FILTER .id = <uuid>$id;
83+
""",
84+
id=elem_id,
85+
)
86+
progress.advance(task)
87+
if to_delete:
88+
answer = Prompt.ask("Commit deletion? (y/N)")
89+
if answer.lower() != "y":
90+
print("Aborting.")
91+
raise RollBack
92+
except RollBack:
93+
pass

0 commit comments

Comments
 (0)