Skip to content

Commit

Permalink
split proceedings query by moving sub-query to its own query (#42 & #45)
Browse files Browse the repository at this point in the history
  • Loading branch information
tholzheim committed Dec 19, 2022
1 parent ec4c7e6 commit 2fc084f
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 34 deletions.
56 changes: 29 additions & 27 deletions ceurws/resources/queries/ceurws.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT
?item
?itemLabel
?itemDescription
Expand Down Expand Up @@ -68,34 +70,34 @@
?language_of_work_or_name rdfs:label ?language_of_work_or_nameLabel.
FILTER((LANG(?language_of_work_or_nameLabel)) = "en")
}
{
SELECT
?item
(GROUP_CONCAT(?_event; SEPARATOR = "|") AS ?event)
(GROUP_CONCAT(?_eventLabel; SEPARATOR = "|") AS ?eventLabel)
(GROUP_CONCAT(?_eventSeries; SEPARATOR = "|") AS ?eventSeries)
(GROUP_CONCAT(?_eventSeriesLabel; SEPARATOR = "|") AS ?eventSeriesLabel)
(GROUP_CONCAT(?_eventSeriesOrdinal; SEPARATOR = "|") AS ?eventSeriesOrdinal)
(GROUP_CONCAT(?_dblpEventId; SEPARATOR = "|") AS ?dblpEventId)
WHERE {
?item wdt:P31 wd:Q1143604;
wdt:P179 wd:Q27230297;
wdt:P4745 ?_event.
?_event rdfs:label ?_eventLabel.
FILTER((LANG(?_eventLabel)) = "en")
OPTIONAL { ?_event wdt:P10692 ?_dblpEventId. }
OPTIONAL {
?_event p:P179 ?_partOfTheEventSeriesStmt.
?_partOfTheEventSeriesStmt ps:P179 ?_eventSeries;
pq:P1545 ?_eventSeriesOrdinal.
?_eventSeries rdfs:label ?_eventSeriesLabel.
FILTER((LANG(?_eventSeriesLabel)) = "en")
}
}
GROUP BY ?item
}
}
ORDER BY ?sVolume
'EventsByProceeding':
'sparql': |
SELECT DISTINCT
?item
(GROUP_CONCAT(?_event; SEPARATOR = "|") AS ?event)
(GROUP_CONCAT(?_eventLabel; SEPARATOR = "|") AS ?eventLabel)
(GROUP_CONCAT(?_eventSeries; SEPARATOR = "|") AS ?eventSeries)
(GROUP_CONCAT(?_eventSeriesLabel; SEPARATOR = "|") AS ?eventSeriesLabel)
(GROUP_CONCAT(?_eventSeriesOrdinal; SEPARATOR = "|") AS ?eventSeriesOrdinal)
(GROUP_CONCAT(?_dblpEventId; SEPARATOR = "|") AS ?dblpEventId)
WHERE {
?item wdt:P31 wd:Q1143604;
wdt:P179 wd:Q27230297;
wdt:P4745 ?_event.
?_event rdfs:label ?_eventLabel.
FILTER((LANG(?_eventLabel)) = "en")
OPTIONAL { ?_event wdt:P10692 ?_dblpEventId. }
OPTIONAL {
?_event p:P179 ?_partOfTheEventSeriesStmt.
?_partOfTheEventSeriesStmt ps:P179 ?_eventSeries;
pq:P1545 ?_eventSeriesOrdinal.
?_eventSeries rdfs:label ?_eventSeriesLabel.
FILTER((LANG(?_eventSeriesLabel)) = "en")
}
}
GROUP BY ?item
'SubmittedByGt6':
sql: |
SELECT count(*) as count,submittedBy
Expand Down
25 changes: 18 additions & 7 deletions ceurws/wikidatasync.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,33 +160,44 @@ def getWikidataEventRecord(self, volume: Volume):
record["instanceOf"] = [instanceOf, "Q7935096"]
return record

def update(self,withStore:bool=True):
'''
def update(self, withStore: bool = True):
"""
update my table from the Wikidata Proceedings SPARQL query
'''
"""
if self.debug:
print(f"Querying proceedings from {self.baseurl} ...")
wdRecords = self.sparql.queryAsListOfDicts(self.wdQuery.query)
# query proceedings
wd_proceedings_records: List[dict] = self.sparql.queryAsListOfDicts(self.wdQuery.query)
# query events
event_query = self.qm.queriesByName["EventsByProceeding"]
wd_event_records: List[dict] = self.sparql.queryAsListOfDicts(event_query.query)
# add events to proceeding records
proceedings_event_map, _duplicates = LOD.getLookup(wd_event_records, "item")
for proceedings_record in wd_proceedings_records:
item = proceedings_record.get("item")
if item in proceedings_event_map:
event_record = proceedings_event_map.get(item)
proceedings_record.update(**event_record)
primaryKey = "URN_NBN"
withCreate = True
withDrop = True
entityInfo = self.sqldb.createTable(
wdRecords,
wd_proceedings_records,
"Proceedings",
primaryKey,
withCreate,
withDrop,
sampleRecordCount=5000,
failIfTooFew=False
)
procsByURN, duplicates = LOD.getLookup(wdRecords, 'URN_NBN')
procsByURN, duplicates = LOD.getLookup(wd_proceedings_records, 'URN_NBN')
if withStore:
self.sqldb.store(procsByURN.values(), entityInfo, executeMany=True, fixNone=True)
if len(duplicates)>0:
print(f"found {len(duplicates)} duplicates URN entries")
if len(duplicates)<10:
print(duplicates)
return wdRecords
return wd_proceedings_records

def loadProceedingsFromCache(self):
'''
Expand Down

0 comments on commit 2fc084f

Please sign in to comment.