Skip to content

Commit aaee49a

Browse files
feat: enhance QuestionsDataSource with metadata management and synchronization for exam partitions
1 parent e7665ce commit aaee49a

File tree

2 files changed

+121
-1
lines changed

2 files changed

+121
-1
lines changed

lib/graphql/questionsDataSource.tsx

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { Container } from "@azure/cosmos";
2-
import { fetchQuestions } from "./repoQuestions";
2+
import { fetchQuestions, fetchQuestionsAndChecksum } from "./repoQuestions";
33
import { getQuestionsContainer } from "./cosmos-client";
44

55
export const QuestionsDataSource = (container: Container) => {
@@ -74,13 +74,111 @@ const extractExamId = (link: string): string => {
7474
return segments[segments.length - 3].replace(/-/g, "_").toLowerCase();
7575
};
7676

77+
// Metadata helpers used to detect dataset updates and resync a partition
78+
const META_TYPE = "meta";
79+
const metaIdForExam = (examId: string) => `_meta_${examId}`;
80+
const SYNC_COOLDOWN_MS = 10 * 60 * 1000; // 10 minutes
81+
82+
const readMeta = async (container: Container, examId: string) => {
83+
try {
84+
const { resource } = await container
85+
.item(metaIdForExam(examId), examId)
86+
.read<any>();
87+
return resource as any | undefined;
88+
} catch (_err) {
89+
return undefined;
90+
}
91+
};
92+
93+
const writeMeta = async (
94+
container: Container,
95+
examId: string,
96+
checksum: string,
97+
) => {
98+
const metaDoc = {
99+
id: metaIdForExam(examId),
100+
examId,
101+
type: META_TYPE,
102+
checksum,
103+
updatedAt: new Date().toISOString(),
104+
};
105+
await container.items.upsert(metaDoc);
106+
};
107+
108+
const purgeExamPartition = async (container: Container, examId: string) => {
109+
// Select IDs for non-meta docs within the partition
110+
const querySpec = {
111+
query:
112+
"SELECT c.id FROM c WHERE c.examId = @examId AND (NOT IS_DEFINED(c.type) OR c.type != @metaType)",
113+
parameters: [
114+
{ name: "@examId", value: examId },
115+
{ name: "@metaType", value: META_TYPE },
116+
],
117+
};
118+
const { resources } = await container.items.query(querySpec).fetchAll();
119+
for (const { id } of resources as Array<{ id: string }>) {
120+
try {
121+
await container.item(id, examId).delete();
122+
} catch (err) {
123+
console.warn(`Failed to delete item ${id} in exam ${examId}:`, err);
124+
}
125+
}
126+
};
127+
128+
const seedExamPartition = async (
129+
container: Container,
130+
examId: string,
131+
questions: any[],
132+
checksum: string,
133+
) => {
134+
for (const question of questions) {
135+
const questionWithExamId = { ...question, examId };
136+
await container.items.upsert(questionWithExamId);
137+
}
138+
await writeMeta(container, examId, checksum);
139+
};
140+
141+
const ensureExamSynced = async (
142+
container: Container,
143+
examId: string,
144+
link: string,
145+
) => {
146+
try {
147+
const meta = await readMeta(container, examId);
148+
// Cooldown: avoid fetching upstream too frequently
149+
if (meta && meta.updatedAt) {
150+
const last = new Date(meta.updatedAt).getTime();
151+
if (!Number.isNaN(last) && Date.now() - last < SYNC_COOLDOWN_MS) {
152+
return;
153+
}
154+
}
155+
156+
const result = await fetchQuestionsAndChecksum(link);
157+
if (!result) return;
158+
159+
const { questions, checksum } = result;
160+
if (!meta || meta.checksum !== checksum) {
161+
await purgeExamPartition(container, examId);
162+
await seedExamPartition(container, examId, questions, checksum);
163+
} else if (meta && meta.checksum === checksum && !meta.updatedAt) {
164+
// Backfill updatedAt for old meta docs
165+
await writeMeta(container, examId, checksum);
166+
}
167+
} catch (err) {
168+
console.warn("ensureExamSynced failed:", err);
169+
}
170+
};
171+
77172
export const CombinedQuestionsDataSource = () => {
78173
return {
79174
async getQuestion(id: string, link: string) {
80175
try {
81176
const examId = extractExamId(link);
82177
const questionsContainer = await getQuestionsContainer();
83178

179+
// Ensure the partition is up to date with source content
180+
await ensureExamSynced(questionsContainer, examId, link);
181+
84182
// Try Cosmos DB first (most efficient)
85183
const querySpec = {
86184
query: "SELECT * FROM c WHERE c.id = @id AND c.examId = @examId",
@@ -128,6 +226,8 @@ export const CombinedQuestionsDataSource = () => {
128226
const examId = extractExamId(link);
129227
const questionsContainer = await getQuestionsContainer();
130228

229+
await ensureExamSynced(questionsContainer, examId, link);
230+
131231
// Try Cosmos DB first
132232
const querySpec = {
133233
query: "SELECT VALUE COUNT(c.id) FROM c WHERE c.examId = @examId",
@@ -170,6 +270,8 @@ export const CombinedQuestionsDataSource = () => {
170270
const examId = extractExamId(link);
171271
const questionsContainer = await getQuestionsContainer();
172272

273+
await ensureExamSynced(questionsContainer, examId, link);
274+
173275
// Try Cosmos DB first
174276
const querySpec = {
175277
query: "SELECT * FROM c WHERE c.examId = @examId",

lib/graphql/repoQuestions.tsx

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { createHash } from "crypto";
12
const scrapeQuestions = (markdownText: string) => {
23
const regex =
34
/### (.*?)\s*\r?\n\r?\n((?:\!\[.*?\]\(.*?\)\s*\r?\n\r?\n)*?)((?:- \[(?:x| )\] .*?\r?\n)+)/gs;
@@ -57,3 +58,20 @@ export const fetchQuestions = async (link: string) => {
5758
console.error(err.message);
5859
}
5960
};
61+
62+
export const fetchQuestionsAndChecksum = async (
63+
link: string,
64+
): Promise<{ questions: any[]; checksum: string } | undefined> => {
65+
try {
66+
const res = await fetch(link);
67+
if (!res.ok) {
68+
throw new Error(res.statusText);
69+
}
70+
const markdown = await res.text();
71+
const questions = scrapeQuestions(markdown);
72+
const checksum = createHash("sha256").update(markdown).digest("hex");
73+
return { questions, checksum };
74+
} catch (err: any) {
75+
console.error(err.message);
76+
}
77+
};

0 commit comments

Comments
 (0)