Skip to content

Commit

Permalink
Accelerate titles' embeddings. (#4492)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?


### Type of change

- [x] Performance Improvement
  • Loading branch information
KevinHuSh authored Jan 15, 2025
1 parent b4614e9 commit c852a6d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 11 deletions.
2 changes: 1 addition & 1 deletion deepdoc/vision/layout_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def __is_garbage(b):
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
"page_number": pn,
} for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts]
} for b in lts if float(b["score"]) >= 0.4 or b["type"] not in self.garbage_layouts]
lts = self.sort_Y_firstly(lts, np.mean(
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
lts = self.layouts_cleanup(bxs, lts)
Expand Down
13 changes: 3 additions & 10 deletions rag/svr/task_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,9 @@ def embedding(docs, mdl, parser_config=None, callback=None):

tk_count = 0
if len(tts) == len(cnts):
tts_ = np.array([])
for i in range(0, len(tts), batch_size):
vts, c = mdl.encode(tts[i: i + batch_size])
if len(tts_) == 0:
tts_ = vts
else:
tts_ = np.concatenate((tts_, vts), axis=0)
tk_count += c
callback(prog=0.6 + 0.1 * (i + 1) / len(tts), msg="")
tts = tts_
vts, c = mdl.encode(tts[0: 1])
tts = np.concatenate([vts for _ in range(len(tts))], axis=0)
tk_count += c

cnts_ = np.array([])
for i in range(0, len(cnts), batch_size):
Expand Down

0 comments on commit c852a6d

Please sign in to comment.