Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
d122366
draft of store_entity rewrite + adaptation of
LukasGold Jul 8, 2024
0857a59
added test for osw.core.OSW.query_instances
LukasGold Jul 8, 2024
7f7c760
Added debugging option to suppress printouts
LukasGold Jul 8, 2024
eb99a07
small fix to avoid limited query
LukasGold Jul 8, 2024
c3e1ef2
testing to fix store_entity
LukasGold Jul 8, 2024
dfb10f1
Fix: Error proofing osw.core.OSW.delete_entity
LukasGold Jul 9, 2024
8c43b16
Enh: Moving to WtSiteConfig from WtSiteLegacyConfig
LukasGold Jul 9, 2024
df2a469
Enh: introducing an enum for overwrite options to avoid otherwise lik…
LukasGold Jul 9, 2024
6f45601
refactoring to remove import osw.model.entity as model
LukasGold Jul 12, 2024
e1ca4b8
making use of WtPage.changed in the function edit() again
LukasGold Jul 12, 2024
b073c50
introducing osw.core.OSW.apply_overwrite_policy()
LukasGold Jul 12, 2024
9ede423
fixing the type error occurring in the file controller test
LukasGold Jul 12, 2024
facf714
making apply_overwrite_policy and accompanying params class private
LukasGold Jul 12, 2024
8ccd82c
moved test_regex_pattern to utils_test and added more unit tests
LukasGold Jul 15, 2024
b87a184
changes to osw.core.OSW_apply_overwrite_policy
LukasGold Jul 15, 2024
22bac86
improved test coverage
LukasGold Jul 15, 2024
c8cedb4
osw.utils.strings.count_match_groups now includes a correction of gro…
LukasGold Jul 15, 2024
cc43216
allowing the upload of entities without jsondata and without an OSW-I…
LukasGold Jul 15, 2024
5755e94
cosmetics
LukasGold Jul 15, 2024
de5df3d
fix: main statement in test
LukasGold Jul 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
485 changes: 421 additions & 64 deletions src/osw/core.py

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions src/osw/utils/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,10 @@ def validate_group_keys(cls, group_keys, values) -> List[str]:
pattern = values.get("pattern")
group_count = count_match_groups(pattern)
# Subtract non-capturing group count
corrected_group_count = group_count - pattern.count("(?:")
if len(group_keys) != corrected_group_count:
if len(group_keys) != group_count:
raise ValueError(
f"The number of group keys ({len(group_keys)}) does not match "
f"the number of match groups ({corrected_group_count})."
f"the number of match groups ({group_count})."
)
return group_keys

Expand Down Expand Up @@ -183,7 +182,8 @@ def count_match_groups(pattern: Union[str, re.Pattern]):
group_count += 1
unmatched_brackets -= 1

return group_count
corrected_group_count = group_count - pattern.count("(?:")
return corrected_group_count


def match_regex_patterns(
Expand Down
9 changes: 9 additions & 0 deletions src/osw/utils/wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,12 @@ def title_from_full_title(full_title: str) -> str:
the title as a string
"""
return full_title.split(":")[-1]


def is_empty(val):
"""checks if the given value is empty"""
if val is None:
return True
elif isinstance(val, list) or isinstance(val, str) or isinstance(val, dict):
return len(val) == 0
return False
40 changes: 22 additions & 18 deletions src/osw/wtsite.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from pydantic import FilePath
from typing_extensions import deprecated

import osw.model.entity as model
import osw.model.page_package as package
import osw.utils.util as ut
import osw.wiki_tools as wt
Expand Down Expand Up @@ -180,7 +179,7 @@ def from_credentials(
site = wt.create_site_object(_domain, "", _credentials)
return cls(WtSite.WtSiteLegacyConfig(site=site))

class GetPageParam(model.OswBaseModel):
class GetPageParam(OswBaseModel):
titles: Union[str, List[str]]
"""title string or list of title strings of the pages to download"""
parallel: Optional[bool] = None
Expand All @@ -202,7 +201,7 @@ def __init__(self, **data):
if self.parallel is None:
self.parallel = False

class GetPageResult(model.OswBaseModel):
class GetPageResult(OswBaseModel):
pages: List["WtPage"]
"""List of pages that have been downloaded"""
errors: List[Exception]
Expand Down Expand Up @@ -275,7 +274,7 @@ def get_WtPage(self, title: str = None):
result = self.get_page(WtSite.GetPageParam(titles=title))
return result.pages[0]

class GetPageContentResult(model.OswBaseModel):
class GetPageContentResult(OswBaseModel):
contents: dict
"""The content of the pages. Keys are page titles, values are
content dictionaries"""
Expand Down Expand Up @@ -362,7 +361,7 @@ def semantic_search(self, query: Union[str, wt.SearchParam]):
"""
return wt.semantic_search(self._site, query)

class ModifySearchResultsParam(model.OswBaseModel):
class ModifySearchResultsParam(OswBaseModel):
"""Todo: should become param of modify_search_results"""

mode: str
Expand Down Expand Up @@ -429,7 +428,7 @@ def modify_search_results(
if not dryrun:
wtpage.edit(comment)

class UploadPageParam(model.OswBaseModel):
class UploadPageParam(OswBaseModel):
"""Parameter object for upload_page method."""

pages: Union["WtPage", List["WtPage"]]
Expand Down Expand Up @@ -490,7 +489,7 @@ def upload_page_(page, index: int = None):
else:
_ = [upload_page_(p, i) for i, p in enumerate(param.pages)]

class CopyPagesParam(model.OswBaseModel):
class CopyPagesParam(OswBaseModel):
"""Configuration to copy several page"""

source_site: "WtSite"
Expand Down Expand Up @@ -545,7 +544,7 @@ def copy_single_page(content_dict: dict):
else:
return [copy_single_page(content) for content in content_list]

class CreatePagePackageParam(model.OswBaseModel):
class CreatePagePackageParam(OswBaseModel):
"""Parameter object for create_page_package method."""

config: package.PagePackageConfig
Expand Down Expand Up @@ -630,7 +629,7 @@ def create_page_package(self, param: CreatePagePackageParam):
with open(file_name, "w", encoding="utf-8") as f:
f.write(content)

class ReadPagePackageParam(model.OswBaseModel):
class ReadPagePackageParam(OswBaseModel):
"""Parameter type of read_page_package."""

storage_path: Union[str, Path]
Expand All @@ -643,7 +642,7 @@ class ReadPagePackageParam(model.OswBaseModel):
debug: Optional[bool] = False
"""If True, debug information is printed to the console."""

class ReadPagePackageResult(model.OswBaseModel):
class ReadPagePackageResult(OswBaseModel):
"""Return type of read_page_package."""

pages: List["WtPage"]
Expand Down Expand Up @@ -786,7 +785,7 @@ def get_slot_content(
pages.append(page_obj)
return WtSite.ReadPagePackageResult(pages=pages)

class UploadPagePackageParam(model.OswBaseModel):
class UploadPagePackageParam(OswBaseModel):
"""Parameter class for upload_page_package method."""

storage_path: Optional[Union[str, Path]] = None
Expand Down Expand Up @@ -1256,6 +1255,7 @@ def _edit(self, comment: str = None, mode="action-multislot"):
content = json.dumps(content, ensure_ascii=False)
params["slot_" + slot_key] = content
if changed:
self.changed = True
self.wtSite._site.api(
"editslots",
token=self.wtSite._site.get_token("csrf"),
Expand All @@ -1266,8 +1266,10 @@ def _edit(self, comment: str = None, mode="action-multislot"):
self.wtSite._clear_cookies()

else:
changed = False
for slot_key in self._slots:
if self._slots_changed[slot_key]:
changed = True
content = self._slots[slot_key]
if self._content_model[slot_key] == "json":
content = json.dumps(content, ensure_ascii=False)
Expand All @@ -1280,6 +1282,8 @@ def _edit(self, comment: str = None, mode="action-multislot"):
summary=comment,
)
self._slots_changed[slot_key] = False
if changed:
self.changed = True

def delete(self, comment: str = None):
"""Deletes the page from the site
Expand Down Expand Up @@ -1321,7 +1325,7 @@ def get_last_changed_time(self):
self._current_revision["timestamp"].replace("Z", "+00:00")
)

class CopyPageConfig(model.OswBaseModel):
class CopyPageConfig(OswBaseModel):
"""Configuration to copy a page"""

source_site: WtSite
Expand All @@ -1337,7 +1341,7 @@ class CopyPageConfig(model.OswBaseModel):
class Config:
arbitrary_types_allowed = True

class PageCopyResult(model.OswBaseModel):
class PageCopyResult(OswBaseModel):
"""Result of copying a page"""

page: "WtPage"
Expand Down Expand Up @@ -1389,7 +1393,7 @@ def copy(self, config: CopyPageConfig) -> PageCopyResult:
print(s2p)
return WtPage.PageCopyResult(page=self, target_altered=True)

class PageDumpConfig(model.OswBaseModel):
class PageDumpConfig(OswBaseModel):
"""Configuration to dump wiki pages to the file system"""

target_dir: Union[str, Path]
Expand Down Expand Up @@ -1549,15 +1553,15 @@ def purge(self):
"""
self._page.purge()

class ExportConfig(model.OswBaseModel):
class ExportConfig(OswBaseModel):
"""Configuration to export a page to XML"""

full_history: Optional[bool] = True
"""if true, export the full history of the page, else only the current revision"""
include_templates: Optional[bool] = False
"""if true, export the templates used in the page"""

class ExportResult(model.OswBaseModel):
class ExportResult(OswBaseModel):
"""Return type of export_xml"""

xml: str
Expand Down Expand Up @@ -1604,7 +1608,7 @@ def export_xml(
else:
return WtPage.ExportResult(success=True, xml=response.text)

class ImportConfig(model.OswBaseModel):
class ImportConfig(OswBaseModel):
"""Configuration to import a page from XML.
see also https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps"""

Expand All @@ -1629,7 +1633,7 @@ class ImportConfig(model.OswBaseModel):
username_mapping: Optional[Dict[str, str]] = {}
"""mapping of usernames in the XML to usernames in the target instance"""

class ImportResult(model.OswBaseModel):
class ImportResult(OswBaseModel):
"""Return type of import_xml"""

success: bool
Expand Down
146 changes: 133 additions & 13 deletions tests/integration/store_and_load_test.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,154 @@
import mwclient
import sys
from pathlib import Path

import osw.model.entity as model
from osw.core import OSW
from osw.auth import CredentialManager
from osw.core import OSW, AddOverwriteClassOptions, OverwriteOptions
from osw.utils.wiki import get_full_title
from osw.wiki_tools import SearchParam
from osw.wtsite import WtSite

# run with: tox -e test -- --wiki_domain domain --wiki_username user --wiki_password pass


def test_store_and_load(wiki_domain, wiki_username, wiki_password):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Beside this integration test we should add a unit test for apply_override_policy()

Copy link
Contributor Author

@LukasGold LukasGold Jul 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SimonStier This implies that apply_overwrite_policy() should be accessible independently of store_entity() - correct?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct =)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

implemented in b073c50
test included with 22bac86

site = mwclient.Site(host=wiki_domain)
site.login(username=wiki_username, password=wiki_password)
wtsite = WtSite(WtSite.WtSiteLegacyConfig(site=site))
cm = CredentialManager()
cm.add_credential(
CredentialManager.UserPwdCredential(
iri=wiki_domain, username=wiki_username, password=wiki_password
)
)
wtsite = WtSite(WtSite.WtSiteConfig(iri=wiki_domain, cred_mngr=cm))
osw = OSW(site=wtsite)

# Check 1: Store an entity and download it again, delete afterward
# Create an item with a label
my_entity = model.Item(label=[model.Label(text="MyItem")])

# Store the item in the OSW
osw.store_entity(my_entity)

# Load the item from the OSW
my_entity2 = osw.load_entity("Item:" + OSW.get_osw_id(my_entity.uuid))

# Check the stored item
assert my_entity.label[0].text == my_entity2.label[0].text

# Delete the item
osw.delete_entity(my_entity)

# Check 2: Store a more complex entity, create a local duplicate, with changed
# properties, test the 'overwrite' param options, delete afterward
# Make non-package scripts available for import
cwd = Path(__file__).parent.absolute()
tests_dir = cwd.parents[1] / "tests"
sys.path.append(str(tests_dir))
# Get required functions
from test_osl import (
check_false,
check_keep_existing,
check_only_empty,
check_replace_remote,
check_true,
)

checks = [
{ # Overwrite properties
"overwrite": OverwriteOptions.true,
"assert": check_true,
},
{ # Do not overwrite properties
"overwrite": OverwriteOptions.false,
"assert": check_false,
},
{ # Overwrite empty properties only
"overwrite": OverwriteOptions.only_empty,
"assert": check_only_empty,
},
{ # Replace the remote entity entirely
"overwrite": AddOverwriteClassOptions.replace_remote,
"assert": check_replace_remote,
},
{ # Keep the existing entity as is
"overwrite": AddOverwriteClassOptions.keep_existing,
"assert": check_keep_existing,
},
]
for check in checks:
# Create a new item with some properties
original_item = model.Item(
label=[model.Label(text="My Item")],
name="MyItem",
iri="", # Empty string property
description=[], # Empty list property
query_label=None, # Equal to a non-existing property
image="File:OSWacacwdcawd.png", # Property not in altered
)
print("Storing original entity...")
osw.store_entity(original_item)
# Alter some of the property values
altered_props = {
"label": [model.Label(text="My Item Duplicate")],
"name": "MyItemDuplicate",
"iri": "http://example.com/MyItemDuplicate",
"description": [model.Label(text="This is a duplicate")],
"query_label": "My Item Duplicate",
"attachments": ["File:OSWacacwdcawd.pdf"], # Property not in original
}
# Create a new item with the altered properties
original_props = {
key: value
for key, value in original_item.dict().items()
if key in ["uuid", "type"]
}
altered_item = model.Item(**{**original_props, **altered_props})
# Update the item in the OSW
print("Storing altered entity...")
osw.store_entity(
param=OSW.StoreEntityParam(
entities=altered_item, overwrite=check["overwrite"]
)
)
# Load the item from the OSW
stored_item = osw.load_entity(
entity_title=OSW.LoadEntityParam(
titles="Item:" + OSW.get_osw_id(original_item.uuid), disable_cache=True
)
).entities[0]
# Check the stored item
check["assert"](original_item, altered_item, stored_item)
# Delete the item
osw.delete_entity(original_item)


def test_query_instances(wiki_domain, wiki_username, wiki_password):
"""Store an entity, query instances of the category of the entity, make sure the
new entity is contained in the list of returned instances, delete the entity."""
cm = CredentialManager()
cm.add_credential(
CredentialManager.UserPwdCredential(
iri=wiki_domain, username=wiki_username, password=wiki_password
)
)
wtsite = WtSite(WtSite.WtSiteConfig(iri=wiki_domain, cred_mngr=cm))
osw = OSW(site=wtsite)
# Create an item with a label
my_item = model.Item(label=[model.Label(text="MyItem")])
fpt = get_full_title(my_item)
# Store the item in the OSW
osw.store_entity(my_item)
# Query instances of the category of the entity
instances = osw.query_instances(
category=OSW.QueryInstancesParam(categories="Category:Item", limit=10000)
)
assert fpt in instances
# Delete the item
osw.delete_entity(my_item)


def test_statement_creation(wiki_domain, wiki_username, wiki_password):
site = mwclient.Site(host=wiki_domain)
site.login(username=wiki_username, password=wiki_password)
wtsite = WtSite(WtSite.WtSiteLegacyConfig(site=site))
cm = CredentialManager()
cm.add_credential(
CredentialManager.UserPwdCredential(
iri=wiki_domain, username=wiki_username, password=wiki_password
)
)
wtsite = WtSite(WtSite.WtSiteConfig(iri=wiki_domain, cred_mngr=cm))
osw = OSW(site=wtsite)

my_entity = model.Item(
Expand Down
Loading