Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6b021be
README update
LukasGold Oct 25, 2024
c7d7c2a
import_with_fallback implented and some reformatting
LukasGold Oct 25, 2024
81b3d66
Doc string reformatting
LukasGold Nov 6, 2024
d7824fc
Reformatting
LukasGold Nov 6, 2024
2cbac21
Bug fixes and doc improvements to express.py
LukasGold Nov 6, 2024
df5cecd
Fix import_with_fallback()
LukasGold Nov 6, 2024
22bbbaf
Improvements to usability and readability
LukasGold Nov 6, 2024
01f0b1d
Fix cases where the default_factory would not be present in the inher…
LukasGold Nov 7, 2024
c104e39
Fix: import_with_fallback() & Context Manager for osw_download_file()
LukasGold Nov 15, 2024
91bd5a2
Moving some functionality from functions in osw.utils.wiki to methods…
LukasGold Nov 15, 2024
8539b95
Making SearchParam available at WtSite.SearchParam
LukasGold Nov 15, 2024
b7222f3
Making openpyxl a requirement for dataimport
LukasGold Nov 15, 2024
5b5060c
Improvements to import_utility.py - potentially breaking some functio…
LukasGold Nov 15, 2024
13acb67
Renaming of casted -> transformed
LukasGold Nov 16, 2024
76bec5e
Fixing failing tests due to missing methods (on classes)
LukasGold Nov 17, 2024
f63df22
Moving NoneType to a separate module
LukasGold Nov 18, 2024
df680e0
Validate attributes of OswBaseModel on assignment
LukasGold Nov 18, 2024
57957a1
Fixing validation errors, causing tests to fail
LukasGold Nov 18, 2024
2967d43
A collection of the param objects defined in the package
LukasGold Nov 18, 2024
632a0a1
Introducing change id and StoreEntityResult
LukasGold Nov 18, 2024
a42fddd
Resolving circular import due to identical naming of osw.types
LukasGold Nov 18, 2024
ee3def9
Minor improvements to express functions
LukasGold Nov 18, 2024
b279ebf
Small fix to osw.core.OSW.StoreEntityParam
LukasGold Nov 18, 2024
beffddf
Still using pydantic.v1
LukasGold Nov 18, 2024
3f1d047
Fixes to QueryInstancesParam & some readability improvements
LukasGold Nov 19, 2024
d908a41
Fixing validation error (introduced earlier)
LukasGold Nov 19, 2024
81c51d2
Accompanying changes in the Category:Entity schema
LukasGold Nov 20, 2024
8d61b9c
Deleting empty strings from jsondata to avoid validation errors
LukasGold Nov 21, 2024
2af7054
Added method to delete a list of pages
LukasGold Nov 21, 2024
1b2bf1f
Fixing params.py: Enums can't be inherited, added new param object
LukasGold Nov 21, 2024
e22b0e4
Applying template for Entity.name & Introducing Meta.uuid
LukasGold Nov 21, 2024
665a189
Merge pull request #93 from OpenSemanticLab/feat-add-rdf-generation
LukasGold Nov 21, 2024
c434777
Fixing: length of dictionary changed during iteration
LukasGold Nov 22, 2024
f8959d2
apply_overwrite_policy() should apply the removal of empty strings
LukasGold Nov 22, 2024
0d9b87b
Fix: Validation error for Entity.label
LukasGold Nov 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
[![PyPI-Server](https://img.shields.io/pypi/v/osw.svg)](https://pypi.org/project/osw/)
[![DOI](https://zenodo.org/badge/458130867.svg)](https://zenodo.org/badge/latestdoi/458130867)
[![Coveralls](https://img.shields.io/coveralls/github/OpenSemanticLab/osw-python/main.svg)](https://coveralls.io/r/<USER>/osw)
[![Coveralls](https://img.shields.io/coveralls/github/OpenSemanticLab/osw-python/main.svg)](https://coveralls.io/r/OpenSemanticLab/osw)
[![docs](xx.xx)](https://opensemanticlab.github.io/osw-python/)
![license](https://img.shields.io/github/license/OpenSemanticLab/osw-python.svg)
<!-- [![Pydantic v2](https://img.shields.io/endpoint?url=https://raw.githubusercontent.
com/pydantic/pydantic/main/docs/badge/v2.json)](https://pydantic.dev) -->
[![Project generated with PyScaffold](https://img.shields.io/badge/-PyScaffold-005CA0?logo=pyscaffold)](https://pyscaffold.org/)

# osw

Python toolset for data processing, queries, wikicode generation and page manipulation within OpenSemanticLab.
General features for object oriented interaction with knowledge graphs are planned to be moved to a standalone package: [oold-python](https://github.com/OpenSemanticWorld/oold-python)

General features for object-oriented interaction with knowledge graphs are planned to be moved to a standalone package:
[oold-python](https://github.com/OpenSemanticWorld/oold-python)

## Installation
```
Expand Down
1 change: 0 additions & 1 deletion examples/use_express_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
overwrite=True, # Required if file already exists
)
local_file_path = local_file.path
local_file.close() # required to release the file lock

# Open a file with context manager directly from an OSW instance
with osw_download_file(
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ S3 =
dataimport =
geopy
deepl
openpyxl
UI =
pysimplegui
all =
Expand Down
4 changes: 3 additions & 1 deletion src/osw/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from warnings import warn

import yaml
from pydantic.v1 import FilePath, PrivateAttr
from pydantic.v1 import PrivateAttr

from osw.custom_types import FilePath
from osw.model.static import OswBaseModel

CREDENTIALS_FN_DEFAULT = "credentials.pwd.yaml"
Expand Down Expand Up @@ -254,6 +255,7 @@ def save_credentials_to_file(
if self.cred_filepath is None:
filepath_ = [Path.cwd() / CREDENTIALS_FN_DEFAULT]
if set_cred_filepath:
# Creates error if file does not exist -> Using custom FilePath
self.cred_filepath = filepath_
for fp in filepath_:
file = Path(fp)
Expand Down
3 changes: 2 additions & 1 deletion src/osw/controller/file/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from osw.controller.file.base import FileController
from osw.core import model

# TODO: add addional remove file with https://docs.prefect.io/2.11.4/concepts/filesystems/
# TODO: add additional remove file with
# https://docs.prefect.io/2.11.4/concepts/filesystems/


# Note: the order of the base classes is important
Expand Down
115 changes: 79 additions & 36 deletions src/osw/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from copy import deepcopy
from enum import Enum
from typing import Any, Dict, List, Optional, Type, Union
from uuid import UUID
from uuid import UUID, uuid4
from warnings import warn

import rdflib
Expand All @@ -31,6 +31,7 @@
get_uuid,
is_empty,
namespace_from_full_title,
remove_empty_strings,
title_from_full_title,
)
from osw.wiki_tools import SearchParam
Expand Down Expand Up @@ -604,6 +605,8 @@ class LoadEntityParam(BaseModel):
autofetch_schema: Optional[bool] = True
"""If true, load the corresponding schemas /
categories ad-hoc if not already present"""
remove_empty_strings: Optional[bool] = True
"""If true, remove key with an empty string value from the jsondata."""
disable_cache: bool = False
"""If true, disable the cache for the loading process"""

Expand Down Expand Up @@ -659,6 +662,8 @@ def load_entity(
schemas = []
schemas_fetched = True
jsondata = page.get_slot_content("jsondata")
if param.remove_empty_strings:
remove_empty_strings(jsondata)
if jsondata:
for category in jsondata["type"]:
schema = (
Expand All @@ -668,19 +673,19 @@ def load_entity(
)
schemas.append(schema)
# generate model if not already exists
cls = schema["title"]
if not hasattr(model, cls):
cls_name: str = schema["title"]
if not hasattr(model, cls_name):
if param.autofetch_schema:
self.fetch_schema(
OSW.FetchSchemaParam(
schema_title=category, mode="append"
)
)
if not hasattr(model, cls):
if not hasattr(model, cls_name):
schemas_fetched = False
print(
f"Error: Model {cls} not found. Schema {category} needs to "
f"be fetched first."
f"Error: Model {cls_name} not found. Schema {category} "
f"needs to be fetched first."
)
if not schemas_fetched:
continue
Expand All @@ -689,7 +694,7 @@ def load_entity(
print("Error: no schema defined")

elif len(schemas) == 1:
cls = getattr(model, schemas[0]["title"])
cls: Type[model.Entity] = getattr(model, schemas[0]["title"])
entity: model.Entity = cls(**jsondata)

else:
Expand All @@ -700,7 +705,7 @@ def load_entity(
entity: model.Entity = cls(**jsondata)

if entity is not None:
# make sure we do not override existing meta data
# make sure we do not override existing metadata
if not hasattr(entity, "meta") or entity.meta is None:
entity.meta = model.Meta()
if (
Expand Down Expand Up @@ -775,6 +780,7 @@ class _ApplyOverwriteParam(OswBaseModel):
namespace: Optional[str]
meta_category_title: Optional[str]
meta_category_template_str: Optional[str]
remove_empty_strings: Optional[bool] = True
inplace: Optional[bool] = False
debug: Optional[bool] = False

Expand Down Expand Up @@ -840,8 +846,10 @@ def set_content(content_to_set: dict) -> None:
page.set_slot_content(slot_, content_to_set[slot_])

# Create a variable to hold the new content
new_content = { # required for json parsing and header rendering
"header": "{{#invoke:Entity|header}}", # required for footer rendering
new_content = {
# required for json parsing and header rendering
"header": "{{#invoke:Entity|header}}",
# required for footer rendering
"footer": "{{#invoke:Entity|footer}}",
}
# Take the shortcut if
Expand All @@ -853,6 +861,8 @@ def set_content(content_to_set: dict) -> None:
):
# Use pydantic serialization, skip none values:
new_content["jsondata"] = json.loads(param.entity.json(exclude_none=True))
if param.remove_empty_strings:
remove_empty_strings(new_content["jsondata"])
set_content(new_content)
page.changed = True
return page # Guard clause --> exit function
Expand Down Expand Up @@ -880,9 +890,10 @@ def set_content(content_to_set: dict) -> None:
remote_content = {}
# Get the remote content
for slot in ["jsondata", "header", "footer"]: # SLOTS:
remote_content[slot] = page.get_slot_content(
slot
) # Todo: remote content does not contain properties that are not set
remote_content[slot] = page.get_slot_content(slot)
# Todo: remote content does not contain properties that are not set
if param.remove_empty_strings:
remove_empty_strings(remote_content["jsondata"])
if remote_content["header"]: # not None or {} or ""
new_content["header"] = remote_content["header"]
if remote_content["footer"]:
Expand All @@ -893,6 +904,8 @@ def set_content(content_to_set: dict) -> None:
# Properties that are not set in the local content will be set to None
# We want those not to be listed as keys
local_content["jsondata"] = json.loads(param.entity.json(exclude_none=True))
if param.remove_empty_strings:
remove_empty_strings(local_content["jsondata"])
if param.debug:
print(f"'local_content': {str(remote_content)}")
# Apply the overwrite logic
Expand Down Expand Up @@ -968,6 +981,12 @@ class StoreEntityParam(OswBaseModel):
"""A list of OverwriteClassParam objects. If a class specific overwrite setting
is set, this setting is used.
"""
remove_empty_strings: Optional[bool] = True
"""If true, remove key with an empty string value from the jsondata."""
change_id: Optional[str] = None
"""ID to document the change. Entities within the same store_entity() call will
share the same change_id. This parameter can also be used to link multiple
store_entity() calls."""
meta_category_title: Optional[str] = "Category:Category"
debug: Optional[bool] = False
_overwrite_per_class: Dict[str, Dict[str, OSW.OverwriteClassParam]] = (
Expand All @@ -980,6 +999,15 @@ def __init__(self, **data):
super().__init__(**data)
if not isinstance(self.entities, list):
self.entities = [self.entities]
if self.change_id is None:
self.change_id = str(uuid4())
for entity in self.entities:
if getattr(entity, "meta", None) is None:
entity.meta = model.Meta()
if entity.meta.change_id is None:
entity.meta.change_id = []
if self.change_id not in entity.meta.change_id:
entity.meta.change_id.append(self.change_id)
if len(self.entities) > 5 and self.parallel is None:
self.parallel = True
if self.parallel is None:
Expand All @@ -1005,9 +1033,15 @@ def __init__(self, **data):
self._overwrite_per_class["by name"][model_name] = param
self._overwrite_per_class["by type"][model_type] = param

class StoreEntityResult(OswBaseModel):
"""Result of store_entity()"""

change_id: str
"""The ID of the change"""

def store_entity(
self, param: Union[StoreEntityParam, OswBaseModel, List[OswBaseModel]]
) -> None:
) -> StoreEntityResult:
"""stores the given dataclass instance as OSW page by calling BaseModel.json()

Parameters
Expand Down Expand Up @@ -1067,22 +1101,26 @@ def store_entity_(
namespace=namespace_,
policy=overwrite_class_param,
meta_category_template_str=meta_category_template_str,
remove_empty_strings=param.remove_empty_strings,
debug=param.debug,
)
)
if meta_category_template:
try:
jsondata = page.get_slot_content("jsondata")
if param.remove_empty_strings:
remove_empty_strings(jsondata)
schema_str = eval_compiled_handlebars_template(
meta_category_template,
page.get_slot_content("jsondata"),
jsondata,
{
"_page_title": entity_title, # legacy
"_page_title": entity_title, # Legacy
"_current_subject_": entity_title,
},
)
schema = json.loads(schema_str)
# put generated schema in definitions section
# currently only enabled for Characteristics
# Put generated schema in definitions section,
# currently only enabled for Characteristics
if hasattr(model, "CharacteristicType") and isinstance(
entity_, model.CharacteristicType
):
Expand Down Expand Up @@ -1170,6 +1208,7 @@ def handle_upload_object_(upload_object: UploadObject) -> None:
handle_upload_object_(upload_object)
for upload_object in upload_object_list
]
return OSW.StoreEntityResult(change_id=param.change_id)

class DeleteEntityParam(OswBaseModel):
entities: Union[OswBaseModel, List[OswBaseModel]]
Expand All @@ -1187,36 +1226,38 @@ def __init__(self, **data):
self.parallel = False

def delete_entity(
self, entity: Union[OswBaseModel, DeleteEntityParam], comment: str = None
self,
entity: Union[OswBaseModel, List[OswBaseModel], DeleteEntityParam],
comment: str = None,
):
"""Deletes the given entity/entities from the OSW instance."""
if not isinstance(entity, OSW.DeleteEntityParam):
entity = OSW.DeleteEntityParam(entities=entity)
if comment is not None:
entity.comment = comment

def delete_entity_(entity, comment_: str = None):
def delete_entity_(entity_, comment_: str = None):
"""Deletes the given entity from the OSW instance.

Parameters
----------
entity:
entity_:
The dataclass instance to delete
comment_:
Command for the change log, by default None
"""
title_ = None
namespace_ = None
if hasattr(entity, "meta"):
if entity.meta and entity.meta.wiki_page:
if entity.meta.wiki_page.title:
title_ = entity.meta.wiki_page.title
if entity.meta.wiki_page.namespace:
namespace_ = entity.meta.wiki_page.namespace
if hasattr(entity_, "meta"):
if entity_.meta and entity_.meta.wiki_page:
if entity_.meta.wiki_page.title:
title_ = entity_.meta.wiki_page.title
if entity_.meta.wiki_page.namespace:
namespace_ = entity_.meta.wiki_page.namespace
if namespace_ is None:
namespace_ = get_namespace(entity)
namespace_ = get_namespace(entity_)
if title_ is None:
title_ = OSW.get_osw_id(entity.uuid)
title_ = OSW.get_osw_id(entity_.uuid)
if namespace_ is None or title_ is None:
print("Error: Unsupported entity type")
return
Expand All @@ -1242,7 +1283,9 @@ def delete_entity_(entity, comment_: str = None):
_ = [delete_entity_(e, entity.comment) for e in entity.entities]

class QueryInstancesParam(OswBaseModel):
categories: Union[Union[str, OswBaseModel], List[Union[str, OswBaseModel]]]
categories: Union[
Union[str, Type[OswBaseModel]], List[Union[str, Type[OswBaseModel]]]
]
parallel: Optional[bool] = None
debug: Optional[bool] = False
limit: Optional[int] = 1000
Expand All @@ -1251,20 +1294,20 @@ class QueryInstancesParam(OswBaseModel):

@staticmethod
def get_full_page_name_parts(
category_: Union[str, OswBaseModel]
category_: Union[str, Type[OswBaseModel]]
) -> Dict[str, str]:
error_msg = (
f"Category must be a string like 'Category:<category name>' or a "
f"dataclass instance with a 'type' attribute. This error occurred on "
f"dataclass subclass with a 'type' attribute. This error occurred on "
f"'{str(category_)}'"
)
if isinstance(category_, str):
string_to_split = category_
elif isinstance(category_, OswBaseModel):
type_ = getattr(category_, "type", None)
if type_ is None:
elif issubclass(category_, OswBaseModel):
type_ = category_.__fields__.get("type")
if getattr(type_, "default", None) is None:
raise TypeError(error_msg)
string_to_split = type_[0]
string_to_split = type_.default[0]
else:
raise TypeError(error_msg)
if "Category:" not in string_to_split:
Expand Down
23 changes: 23 additions & 0 deletions src/osw/custom_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import sys
from pathlib import Path

from pydantic.v1.types import FilePath as PydanticFilePath
from pydantic.v1.validators import path_validator # , path_exists_validator

if sys.version_info < (3, 10):
NoneType = type(None)
else:
from types import NoneType # noqa: F401


class FilePath(PydanticFilePath):
# Overwrite the Pydantic FilePath class to allow non-existing paths
@classmethod
def __get_validators__(cls):
yield path_validator
# yield path_exists_validator # Remove this line to allow non-existing paths
yield cls.validate

@classmethod
def validate(cls, value: Path) -> Path:
return value
Loading
Loading