Skip to content

Commit 52fcbcc

Browse files
authored
Merge pull request #25 from TogetherCrew/fix/mediawiki-activities-wrong-arg
fix: Added incomplete file removal!
2 parents 68050a9 + af15793 commit 52fcbcc

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

hivemind_etl/mediawiki/etl.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ def extract(self, api_url: str, dump_dir: str | None = None) -> None:
3434
else:
3535
self.dump_dir = dump_dir
3636

37-
self.wikiteam_crawler.crawl(api_url, dump_dir)
37+
try:
38+
self.wikiteam_crawler.crawl(api_url, dump_dir)
39+
except Exception as e:
40+
logging.error(f"Error crawling {api_url}: {e}")
41+
logging.warning("Removing incomplete dumped data if available!")
42+
if os.path.exists(dump_dir):
43+
shutil.rmtree(dump_dir)
44+
raise e
3845

3946
def transform(self) -> list[Document]:
4047
pages = parse_mediawiki_xml(file_dir=self.dump_dir)

0 commit comments

Comments
 (0)