Skip to content

Commit b712168

Browse files
committed
Parallelization for osw.data.import_utility.get_entities_from_osw added
1 parent 7ac25e1 commit b712168

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

src/osw/data/import_utility.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from osw.model import entity as model
2020
from osw.utils.regex import MatchResult, RegExPatternExtended
2121
from osw.utils.regex_pattern import REGEX_PATTERN_LIB
22-
from osw.wtsite import WtSite
22+
from osw.wtsite import WtPage, WtSite
2323

2424
# Constants
2525
ENABLE_SORTING = True
@@ -761,6 +761,7 @@ def get_entities_from_osw(
761761
cred_filepath,
762762
domain,
763763
limit: int = None,
764+
parallel: bool = True,
764765
osw_obj: OSW = None,
765766
debug: bool = False,
766767
) -> list:
@@ -780,6 +781,8 @@ def get_entities_from_osw(
780781
Domain of the OSW instance.
781782
limit:
782783
Maximum number of entities returned by this query
784+
parallel:
785+
If True, the search and getting entities is done in parallel.
783786
osw_obj:
784787
OSW instance to use. If None, a new instance is created.
785788
debug:
@@ -817,25 +820,31 @@ def test_if_empty_list_or_none(obj) -> bool:
817820
wt.SearchParam(
818821
query=f"[[HasType::Category:OSW{str(category_uuid).replace('-', '')}]]",
819822
debug=debug,
823+
parallel=parallel,
820824
)
821825
if limit is None
822826
else wt.SearchParam(
823827
query=f"[[HasType::Category:OSW{str(category_uuid).replace('-', '')}]]",
824828
debug=debug,
825829
limit=limit,
830+
parallel=parallel,
826831
)
827832
)
828833
)
829-
for entity in entities:
830-
# entity = full page name
831-
page = wtsite_obj.get_page(WtSite.GetPageParam(titles=[entity])).pages[0]
834+
835+
pages: List[WtPage] = wtsite_obj.get_page(
836+
WtSite.GetPageParam(titles=entities, parallel=parallel)
837+
).pages
838+
839+
for page in pages:
832840
if page.exists:
833841
jsondata = page.get_slot_content("jsondata")
834-
jsondata["full_page_title"] = entity
842+
jsondata["full_page_title"] = page.title
835843
kwargs = {
836844
k: v for k, v in jsondata.items() if not test_if_empty_list_or_none(v)
837845
}
838846
entities_from_osw.append(model_to_cast_to(**kwargs))
847+
839848
return entities_from_osw
840849

841850

0 commit comments

Comments
 (0)