Skip to content

Commit 9f19d7a

Browse files
authored
Merge pull request #71 from OpenSemanticLab/70-fix-file-detection
fix: add regex to extract file from editor templates
2 parents a76f0ea + 21ae8d4 commit 9f19d7a

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

src/osw/utils/regex_pattern.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,23 @@
110110
".drawio.png",
111111
],
112112
),
113+
RegExPatternExtended(
114+
description="File uuid in template",
115+
pattern=r"{{\s*(Template:Editor\/(DrawIO|SvgEdit|Kekule|Spreadsheet))[\s\S]*?\|\s*uuid\s*=\s*([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})[\s\S]*?}}",
116+
group_keys=["Full template name", "Editor", "UUID"],
117+
example_str="""{{Template:Editor/DrawIO
118+
| file_name = Semantic triple schematic
119+
| page_name = Item:OSW8bccb1f0123f47d1831a1348ecbe63cc
120+
| uuid = c4171917-ea09-4d98-823a-6af8282a6d50
121+
| full_width = 0
122+
| width = 300px
123+
}}""",
124+
expected_groups=[
125+
"Template:Editor/DrawIO",
126+
"DrawIO",
127+
"c4171917-ea09-4d98-823a-6af8282a6d50",
128+
],
129+
),
113130
RegExPatternExtended(
114131
description="UUID from full page title",
115132
pattern=r"([A-Za-z]+):([A-Z]+)([0-9a-fA-F]{32})((?:\.[\w-]+)*)",

src/osw/wtsite.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from osw.model.static import OswBaseModel
2727
from osw.utils.regex_pattern import REGEX_PATTERN_LIB
2828
from osw.utils.util import parallelize
29+
from osw.utils.wiki import get_osw_id
2930

3031
# Constants
3132
SLOTS = {
@@ -1555,6 +1556,31 @@ def find_file_page_refs_in_slots(self, slots: List[str] = None) -> List[str]:
15551556
"Full page name",
15561557
)
15571558
file_page_refs.extend(full_page_names)
1559+
# find all files in editor templates
1560+
pattern = REGEX_PATTERN_LIB["File uuid in template"]
1561+
res = pattern.finditer(str(content))
1562+
# interate over all matches
1563+
for match in res:
1564+
ft = None
1565+
# check if the match has the groups "Editor" and "UUID" and UUID is a valid OSW ID
1566+
try:
1567+
if "Editor" in match.groups and "UUID" in match.groups:
1568+
# construct a file page title
1569+
osw_id = get_osw_id(match.groups["UUID"])
1570+
if match.groups["Editor"] == "DrawIO":
1571+
ft = "File:" + osw_id + ".drawio.svg"
1572+
elif match.groups["Editor"] == "SvgEdit":
1573+
ft = "File:" + osw_id + ".svg"
1574+
elif match.groups["Editor"] == "Kekule":
1575+
ft = "File:" + osw_id + ".kekule.json"
1576+
elif match.groups["Editor"] == "Spreadsheet":
1577+
ft = "File:" + osw_id + ".luckysheet.json"
1578+
elif match.groups["Editor"] == "Wellplate":
1579+
ft = "File:" + osw_id + ".wellplate.svg"
1580+
if ft is not None:
1581+
file_page_refs.append(ft)
1582+
except ValueError:
1583+
print("Warning: Error while parsing uuid in editor template")
15581584
return list(set(file_page_refs))
15591585

15601586
def purge(self):

tests/test_osl.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,85 @@ def test_apply_overwrite_policy():
182182
check["assert"](original_item, altered_item, new_item)
183183

184184
# todo: # where do we really need to interact with an OSL instance?
185+
186+
187+
def test_wtpage_get_files():
188+
# create a offline page with test content in slot main
189+
# real-world example from Item:OSW8bccb1f0123f47d1831a1348ecbe63cc (About this platform)
190+
page = OfflineWtPage()
191+
page.set_slot_content(
192+
"main",
193+
"""
194+
195+
====File pages====
196+
The following image gallery displays the relevant elements of a WikiFile page, which are specific to file pages.
197+
{{Template:Viewer/Media
198+
| image_size = 300
199+
| mode = default
200+
| textdata = File:OSW486215598a1f4993b063804775d70716.png{{!}}Example WikiFile page with preview of the file;
201+
File:OSWb4f81db0862d4430b45e6fcbda9fc1ff.PNG{{!}}Infobox of a WikiFile page;
202+
File:OSWb6115f4d5b414a3f8b3dffd420c82c2e.PNG{{!}}Footer of a WikiFile page;
203+
}}
204+
205+
===Semantic triples===
206+
The semantic triple is the basic building block of semantic technology. It links two nodes ("Subject" and "Object") by a "Property" (sometimes called "Predicate"), which expresses the relation between the two, giving meaning to the link.
207+
{{Template:Editor/DrawIO
208+
| file_name = Semantic triple schematic
209+
| page_name = Item:OSW8bccb1f0123f47d1831a1348ecbe63cc
210+
| uuid = c4171917-ea09-4d98-823a-6af8282a6d50
211+
| full_width = 0
212+
| width = 300px
213+
}}{{Template:Viewer/Media
214+
| image_size = 600
215+
| mode = default
216+
| textdata = File:OSW51ad8f9d660641f9880006c40f41cb56.png{{!}}An example for a network of semantic links, describing a publication, one of the authors and his affiliate;
217+
}}
218+
219+
===Ontologies===
220+
Ontologies aren't just vocabularies that define terms linke a dictionary would do. Ontologies structure knowledge by defining concepts and the relations among them. Often, they involve a hierarchy, which springs from a very generic object, aiming to describe almost everything, like "Thing" in {{Template:Viewer/Link|page=|url=https://schema.org/docs/full.html|label=Schema.org}} or "Entity" within this [[:Category:Entity#Subcategories|platform]]. Use the > Symbol to expand different hierarchy levels and to explore the structure.
221+
222+
There are many ontologies in use and even more in development. Most scientific domains have their own, often multiples. Here are some prominent examples:
223+
224+
*{{Template:Viewer/Link|page=|url=https://emmo-repo.github.io/|label=The Elementary Multiperspective Material Ontology (EMMO)}}
225+
* {{Template:Viewer/Link|page=|url=https://big-map.github.io/BattINFO/index.html|label=Battery Interface Ontology (BattINFO)}}
226+
227+
==Object Orientation==
228+
Object Orientation (OO) is a theoretical concept in computer science. It uses the abstract concept of objects to describe and model real-world objects.
229+
230+
===Basic building blocks===
231+
*Object
232+
233+
234+
{{Template:Editor/DrawIO
235+
| file_name = Basic building blocks of Object Orientation diagramm
236+
| page_name = Item:OSW8bccb1f0123f47d1831a1348ecbe63cc
237+
| uuid = 0bea84d5-4c07-4374-a4b4-5dc84d9ba302
238+
| full_width = 0
239+
| width = 200px
240+
}}
241+
242+
===Linked Data===
243+
Object oriented linked data (OO-LD) in Open Semantic Lab is our way to leverage the functionality of linked data while employing concepts of object orientation to avoid the redundant definition of (semantic) properties of objects and mapping of semantic properties to ontologies.
244+
245+
The following figure sketches how Open Semantic Lab is used to modell objects in the real-world. On the left we see a hierarchy of abstract concepts, starting from the most generic at the top and ending up at the most specific at the bottom. At each level either new properties are introduced or the range of property values is reduced to a certain set to account for the specifics of a category. A category or class is used to define and bundle all properties or attributes that items or instances of a certain class have in common. {{Template:Editor/DrawIO
246+
| file_name = Object oriented linked data in OSL diagramm
247+
| page_name = Item:OSW8bccb1f0123f47d1831a1348ecbe63cc
248+
| uuid = 58baa09e-c00b-42cc-b077-9fe4d58ccf82
249+
| width = 600px
250+
}}
251+
252+
==Glossary of frequently used terms==
253+
""",
254+
)
255+
256+
file_list = page.find_file_page_refs_in_slots()
257+
print(file_list)
258+
assert len(file_list) == 7
259+
# assert if the file list contains the expected files
260+
assert "File:OSW486215598a1f4993b063804775d70716.png" in file_list
261+
assert "File:OSWb4f81db0862d4430b45e6fcbda9fc1ff.PNG" in file_list
262+
assert "File:OSWb6115f4d5b414a3f8b3dffd420c82c2e.PNG" in file_list
263+
assert "File:OSWc4171917ea094d98823a6af8282a6d50.drawio.svg" in file_list
264+
assert "File:OSW51ad8f9d660641f9880006c40f41cb56.png" in file_list
265+
assert "File:OSW0bea84d54c074374a4b45dc84d9ba302.drawio.svg" in file_list
266+
assert "File:OSW58baa09ec00b42ccb0779fe4d58ccf82.drawio.svg" in file_list

0 commit comments

Comments
 (0)