Skip to content

Commit 549928f

Browse files
committed
Fix lru cache multiprocessing memory leak issue
1 parent fbb7b3d commit 549928f

File tree

2 files changed

+12
-7
lines changed

2 files changed

+12
-7
lines changed

src/deep_impact/evaluation/ranker.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from itertools import product
2+
from multiprocessing import Pool
23
from pathlib import Path
34
from typing import Union
45

@@ -9,6 +10,11 @@
910
from src.utils.datasets import QueryRelevanceDataset, Queries, RunFile
1011

1112

13+
def rank(args):
14+
index, qid, query_terms = args
15+
return qid, index.score(query_terms=query_terms)
16+
17+
1218
class Ranker:
1319
def __init__(
1420
self,
@@ -29,9 +35,11 @@ def __init__(
2935
self.model_cls = DeepPairwiseImpact if pairwise else DeepImpact
3036

3137
def run(self):
32-
for qid in tqdm(self.qrels.keys()):
33-
scores = self.rank(qid=qid)
34-
self.run_file.writelines(qid, scores)
38+
with Pool(self.num_workers) as p, tqdm(total=len(self.qrels)) as pbar:
39+
for qid, scores in p.imap_unordered(rank, [(self.index, qid, self.get_query_terms(qid)) for qid in
40+
self.qrels.keys()]):
41+
self.run_file.writelines(qid, scores)
42+
pbar.update(1)
3543

3644
def get_query_terms(self, qid):
3745
query_terms = self.model_cls.process_query(query=self.queries[qid])
@@ -42,6 +50,3 @@ def get_query_terms(self, qid):
4250
query_terms.add(f'{term1}|{term2}')
4351

4452
return query_terms
45-
46-
def rank(self, qid):
47-
return self.index.score(query_terms=self.get_query_terms(qid=qid))

src/deep_impact/inverted_index/inverted_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def term_location(self, term):
3737
start, end = struct.unpack(LOC_BLOCK_FORMAT, bf.read(LOC_BLOCK_BYTES))
3838
return term_id, start, end
3939

40-
@lru_cache(maxsize=10000)
40+
# @lru_cache(maxsize=10000)
4141
def term_docs(self, term):
4242
term_id, start, end = self.term_location(term)
4343
if term_id is None:

0 commit comments

Comments
 (0)