Skip to content

Commit ca4bb17

Browse files
authored
Merge pull request #61 from OpenSemanticLab/store_entity-rework-add-selective-overwrite
store_entity rework to add selective overwrite
2 parents 1b0103d + de5df3d commit ca4bb17

File tree

8 files changed

+854
-117
lines changed

8 files changed

+854
-117
lines changed

src/osw/core.py

Lines changed: 421 additions & 64 deletions
Large diffs are not rendered by default.

src/osw/utils/strings.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,10 @@ def validate_group_keys(cls, group_keys, values) -> List[str]:
9898
pattern = values.get("pattern")
9999
group_count = count_match_groups(pattern)
100100
# Subtract non-capturing group count
101-
corrected_group_count = group_count - pattern.count("(?:")
102-
if len(group_keys) != corrected_group_count:
101+
if len(group_keys) != group_count:
103102
raise ValueError(
104103
f"The number of group keys ({len(group_keys)}) does not match "
105-
f"the number of match groups ({corrected_group_count})."
104+
f"the number of match groups ({group_count})."
106105
)
107106
return group_keys
108107

@@ -183,7 +182,8 @@ def count_match_groups(pattern: Union[str, re.Pattern]):
183182
group_count += 1
184183
unmatched_brackets -= 1
185184

186-
return group_count
185+
corrected_group_count = group_count - pattern.count("(?:")
186+
return corrected_group_count
187187

188188

189189
def match_regex_patterns(

src/osw/utils/wiki.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,3 +160,12 @@ def title_from_full_title(full_title: str) -> str:
160160
the title as a string
161161
"""
162162
return full_title.split(":")[-1]
163+
164+
165+
def is_empty(val):
166+
"""checks if the given value is empty"""
167+
if val is None:
168+
return True
169+
elif isinstance(val, list) or isinstance(val, str) or isinstance(val, dict):
170+
return len(val) == 0
171+
return False

src/osw/wtsite.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from pydantic import FilePath
2020
from typing_extensions import deprecated
2121

22-
import osw.model.entity as model
2322
import osw.model.page_package as package
2423
import osw.utils.util as ut
2524
import osw.wiki_tools as wt
@@ -180,7 +179,7 @@ def from_credentials(
180179
site = wt.create_site_object(_domain, "", _credentials)
181180
return cls(WtSite.WtSiteLegacyConfig(site=site))
182181

183-
class GetPageParam(model.OswBaseModel):
182+
class GetPageParam(OswBaseModel):
184183
titles: Union[str, List[str]]
185184
"""title string or list of title strings of the pages to download"""
186185
parallel: Optional[bool] = None
@@ -202,7 +201,7 @@ def __init__(self, **data):
202201
if self.parallel is None:
203202
self.parallel = False
204203

205-
class GetPageResult(model.OswBaseModel):
204+
class GetPageResult(OswBaseModel):
206205
pages: List["WtPage"]
207206
"""List of pages that have been downloaded"""
208207
errors: List[Exception]
@@ -275,7 +274,7 @@ def get_WtPage(self, title: str = None):
275274
result = self.get_page(WtSite.GetPageParam(titles=title))
276275
return result.pages[0]
277276

278-
class GetPageContentResult(model.OswBaseModel):
277+
class GetPageContentResult(OswBaseModel):
279278
contents: dict
280279
"""The content of the pages. Keys are page titles, values are
281280
content dictionaries"""
@@ -362,7 +361,7 @@ def semantic_search(self, query: Union[str, wt.SearchParam]):
362361
"""
363362
return wt.semantic_search(self._site, query)
364363

365-
class ModifySearchResultsParam(model.OswBaseModel):
364+
class ModifySearchResultsParam(OswBaseModel):
366365
"""Todo: should become param of modify_search_results"""
367366

368367
mode: str
@@ -429,7 +428,7 @@ def modify_search_results(
429428
if not dryrun:
430429
wtpage.edit(comment)
431430

432-
class UploadPageParam(model.OswBaseModel):
431+
class UploadPageParam(OswBaseModel):
433432
"""Parameter object for upload_page method."""
434433

435434
pages: Union["WtPage", List["WtPage"]]
@@ -490,7 +489,7 @@ def upload_page_(page, index: int = None):
490489
else:
491490
_ = [upload_page_(p, i) for i, p in enumerate(param.pages)]
492491

493-
class CopyPagesParam(model.OswBaseModel):
492+
class CopyPagesParam(OswBaseModel):
494493
"""Configuration to copy several page"""
495494

496495
source_site: "WtSite"
@@ -545,7 +544,7 @@ def copy_single_page(content_dict: dict):
545544
else:
546545
return [copy_single_page(content) for content in content_list]
547546

548-
class CreatePagePackageParam(model.OswBaseModel):
547+
class CreatePagePackageParam(OswBaseModel):
549548
"""Parameter object for create_page_package method."""
550549

551550
config: package.PagePackageConfig
@@ -630,7 +629,7 @@ def create_page_package(self, param: CreatePagePackageParam):
630629
with open(file_name, "w", encoding="utf-8") as f:
631630
f.write(content)
632631

633-
class ReadPagePackageParam(model.OswBaseModel):
632+
class ReadPagePackageParam(OswBaseModel):
634633
"""Parameter type of read_page_package."""
635634

636635
storage_path: Union[str, Path]
@@ -643,7 +642,7 @@ class ReadPagePackageParam(model.OswBaseModel):
643642
debug: Optional[bool] = False
644643
"""If True, debug information is printed to the console."""
645644

646-
class ReadPagePackageResult(model.OswBaseModel):
645+
class ReadPagePackageResult(OswBaseModel):
647646
"""Return type of read_page_package."""
648647

649648
pages: List["WtPage"]
@@ -786,7 +785,7 @@ def get_slot_content(
786785
pages.append(page_obj)
787786
return WtSite.ReadPagePackageResult(pages=pages)
788787

789-
class UploadPagePackageParam(model.OswBaseModel):
788+
class UploadPagePackageParam(OswBaseModel):
790789
"""Parameter class for upload_page_package method."""
791790

792791
storage_path: Optional[Union[str, Path]] = None
@@ -1256,6 +1255,7 @@ def _edit(self, comment: str = None, mode="action-multislot"):
12561255
content = json.dumps(content, ensure_ascii=False)
12571256
params["slot_" + slot_key] = content
12581257
if changed:
1258+
self.changed = True
12591259
self.wtSite._site.api(
12601260
"editslots",
12611261
token=self.wtSite._site.get_token("csrf"),
@@ -1266,8 +1266,10 @@ def _edit(self, comment: str = None, mode="action-multislot"):
12661266
self.wtSite._clear_cookies()
12671267

12681268
else:
1269+
changed = False
12691270
for slot_key in self._slots:
12701271
if self._slots_changed[slot_key]:
1272+
changed = True
12711273
content = self._slots[slot_key]
12721274
if self._content_model[slot_key] == "json":
12731275
content = json.dumps(content, ensure_ascii=False)
@@ -1280,6 +1282,8 @@ def _edit(self, comment: str = None, mode="action-multislot"):
12801282
summary=comment,
12811283
)
12821284
self._slots_changed[slot_key] = False
1285+
if changed:
1286+
self.changed = True
12831287

12841288
def delete(self, comment: str = None):
12851289
"""Deletes the page from the site
@@ -1321,7 +1325,7 @@ def get_last_changed_time(self):
13211325
self._current_revision["timestamp"].replace("Z", "+00:00")
13221326
)
13231327

1324-
class CopyPageConfig(model.OswBaseModel):
1328+
class CopyPageConfig(OswBaseModel):
13251329
"""Configuration to copy a page"""
13261330

13271331
source_site: WtSite
@@ -1337,7 +1341,7 @@ class CopyPageConfig(model.OswBaseModel):
13371341
class Config:
13381342
arbitrary_types_allowed = True
13391343

1340-
class PageCopyResult(model.OswBaseModel):
1344+
class PageCopyResult(OswBaseModel):
13411345
"""Result of copying a page"""
13421346

13431347
page: "WtPage"
@@ -1389,7 +1393,7 @@ def copy(self, config: CopyPageConfig) -> PageCopyResult:
13891393
print(s2p)
13901394
return WtPage.PageCopyResult(page=self, target_altered=True)
13911395

1392-
class PageDumpConfig(model.OswBaseModel):
1396+
class PageDumpConfig(OswBaseModel):
13931397
"""Configuration to dump wiki pages to the file system"""
13941398

13951399
target_dir: Union[str, Path]
@@ -1549,15 +1553,15 @@ def purge(self):
15491553
"""
15501554
self._page.purge()
15511555

1552-
class ExportConfig(model.OswBaseModel):
1556+
class ExportConfig(OswBaseModel):
15531557
"""Configuration to export a page to XML"""
15541558

15551559
full_history: Optional[bool] = True
15561560
"""if true, export the full history of the page, else only the current revision"""
15571561
include_templates: Optional[bool] = False
15581562
"""if true, export the templates used in the page"""
15591563

1560-
class ExportResult(model.OswBaseModel):
1564+
class ExportResult(OswBaseModel):
15611565
"""Return type of export_xml"""
15621566

15631567
xml: str
@@ -1604,7 +1608,7 @@ def export_xml(
16041608
else:
16051609
return WtPage.ExportResult(success=True, xml=response.text)
16061610

1607-
class ImportConfig(model.OswBaseModel):
1611+
class ImportConfig(OswBaseModel):
16081612
"""Configuration to import a page from XML.
16091613
see also https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps"""
16101614

@@ -1629,7 +1633,7 @@ class ImportConfig(model.OswBaseModel):
16291633
username_mapping: Optional[Dict[str, str]] = {}
16301634
"""mapping of usernames in the XML to usernames in the target instance"""
16311635

1632-
class ImportResult(model.OswBaseModel):
1636+
class ImportResult(OswBaseModel):
16331637
"""Return type of import_xml"""
16341638

16351639
success: bool

tests/integration/store_and_load_test.py

Lines changed: 133 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,154 @@
1-
import mwclient
1+
import sys
2+
from pathlib import Path
23

34
import osw.model.entity as model
4-
from osw.core import OSW
5+
from osw.auth import CredentialManager
6+
from osw.core import OSW, AddOverwriteClassOptions, OverwriteOptions
7+
from osw.utils.wiki import get_full_title
58
from osw.wiki_tools import SearchParam
69
from osw.wtsite import WtSite
710

811
# run with: tox -e test -- --wiki_domain domain --wiki_username user --wiki_password pass
912

1013

1114
def test_store_and_load(wiki_domain, wiki_username, wiki_password):
12-
site = mwclient.Site(host=wiki_domain)
13-
site.login(username=wiki_username, password=wiki_password)
14-
wtsite = WtSite(WtSite.WtSiteLegacyConfig(site=site))
15+
cm = CredentialManager()
16+
cm.add_credential(
17+
CredentialManager.UserPwdCredential(
18+
iri=wiki_domain, username=wiki_username, password=wiki_password
19+
)
20+
)
21+
wtsite = WtSite(WtSite.WtSiteConfig(iri=wiki_domain, cred_mngr=cm))
1522
osw = OSW(site=wtsite)
16-
23+
# Check 1: Store an entity and download it again, delete afterward
24+
# Create an item with a label
1725
my_entity = model.Item(label=[model.Label(text="MyItem")])
18-
26+
# Store the item in the OSW
1927
osw.store_entity(my_entity)
20-
28+
# Load the item from the OSW
2129
my_entity2 = osw.load_entity("Item:" + OSW.get_osw_id(my_entity.uuid))
22-
30+
# Check the stored item
2331
assert my_entity.label[0].text == my_entity2.label[0].text
24-
32+
# Delete the item
2533
osw.delete_entity(my_entity)
2634

35+
# Check 2: Store a more complex entity, create a local duplicate, with changed
36+
# properties, test the 'overwrite' param options, delete afterward
37+
# Make non-package scripts available for import
38+
cwd = Path(__file__).parent.absolute()
39+
tests_dir = cwd.parents[1] / "tests"
40+
sys.path.append(str(tests_dir))
41+
# Get required functions
42+
from test_osl import (
43+
check_false,
44+
check_keep_existing,
45+
check_only_empty,
46+
check_replace_remote,
47+
check_true,
48+
)
49+
50+
checks = [
51+
{ # Overwrite properties
52+
"overwrite": OverwriteOptions.true,
53+
"assert": check_true,
54+
},
55+
{ # Do not overwrite properties
56+
"overwrite": OverwriteOptions.false,
57+
"assert": check_false,
58+
},
59+
{ # Overwrite empty properties only
60+
"overwrite": OverwriteOptions.only_empty,
61+
"assert": check_only_empty,
62+
},
63+
{ # Replace the remote entity entirely
64+
"overwrite": AddOverwriteClassOptions.replace_remote,
65+
"assert": check_replace_remote,
66+
},
67+
{ # Keep the existing entity as is
68+
"overwrite": AddOverwriteClassOptions.keep_existing,
69+
"assert": check_keep_existing,
70+
},
71+
]
72+
for check in checks:
73+
# Create a new item with some properties
74+
original_item = model.Item(
75+
label=[model.Label(text="My Item")],
76+
name="MyItem",
77+
iri="", # Empty string property
78+
description=[], # Empty list property
79+
query_label=None, # Equal to a non-existing property
80+
image="File:OSWacacwdcawd.png", # Property not in altered
81+
)
82+
print("Storing original entity...")
83+
osw.store_entity(original_item)
84+
# Alter some of the property values
85+
altered_props = {
86+
"label": [model.Label(text="My Item Duplicate")],
87+
"name": "MyItemDuplicate",
88+
"iri": "http://example.com/MyItemDuplicate",
89+
"description": [model.Label(text="This is a duplicate")],
90+
"query_label": "My Item Duplicate",
91+
"attachments": ["File:OSWacacwdcawd.pdf"], # Property not in original
92+
}
93+
# Create a new item with the altered properties
94+
original_props = {
95+
key: value
96+
for key, value in original_item.dict().items()
97+
if key in ["uuid", "type"]
98+
}
99+
altered_item = model.Item(**{**original_props, **altered_props})
100+
# Update the item in the OSW
101+
print("Storing altered entity...")
102+
osw.store_entity(
103+
param=OSW.StoreEntityParam(
104+
entities=altered_item, overwrite=check["overwrite"]
105+
)
106+
)
107+
# Load the item from the OSW
108+
stored_item = osw.load_entity(
109+
entity_title=OSW.LoadEntityParam(
110+
titles="Item:" + OSW.get_osw_id(original_item.uuid), disable_cache=True
111+
)
112+
).entities[0]
113+
# Check the stored item
114+
check["assert"](original_item, altered_item, stored_item)
115+
# Delete the item
116+
osw.delete_entity(original_item)
117+
118+
119+
def test_query_instances(wiki_domain, wiki_username, wiki_password):
120+
"""Store an entity, query instances of the category of the entity, make sure the
121+
new entity is contained in the list of returned instances, delete the entity."""
122+
cm = CredentialManager()
123+
cm.add_credential(
124+
CredentialManager.UserPwdCredential(
125+
iri=wiki_domain, username=wiki_username, password=wiki_password
126+
)
127+
)
128+
wtsite = WtSite(WtSite.WtSiteConfig(iri=wiki_domain, cred_mngr=cm))
129+
osw = OSW(site=wtsite)
130+
# Create an item with a label
131+
my_item = model.Item(label=[model.Label(text="MyItem")])
132+
fpt = get_full_title(my_item)
133+
# Store the item in the OSW
134+
osw.store_entity(my_item)
135+
# Query instances of the category of the entity
136+
instances = osw.query_instances(
137+
category=OSW.QueryInstancesParam(categories="Category:Item", limit=10000)
138+
)
139+
assert fpt in instances
140+
# Delete the item
141+
osw.delete_entity(my_item)
142+
27143

28144
def test_statement_creation(wiki_domain, wiki_username, wiki_password):
29-
site = mwclient.Site(host=wiki_domain)
30-
site.login(username=wiki_username, password=wiki_password)
31-
wtsite = WtSite(WtSite.WtSiteLegacyConfig(site=site))
145+
cm = CredentialManager()
146+
cm.add_credential(
147+
CredentialManager.UserPwdCredential(
148+
iri=wiki_domain, username=wiki_username, password=wiki_password
149+
)
150+
)
151+
wtsite = WtSite(WtSite.WtSiteConfig(iri=wiki_domain, cred_mngr=cm))
32152
osw = OSW(site=wtsite)
33153

34154
my_entity = model.Item(

0 commit comments

Comments
 (0)