-
Notifications
You must be signed in to change notification settings - Fork 2
solr reindexing through events #873
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dtq-dev
Are you sure you want to change the base?
Changes from all commits
09dbc97
d2e23f5
173e1a3
d227956
1d953cb
dd9dba4
247099e
0134338
9a83916
d437b4e
bccc4fd
a208d99
6a652eb
60d2592
01d6986
8c7fb77
754b0b3
1883949
3251826
4a77150
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,13 +21,15 @@ | |
| import org.dspace.authorize.dao.ResourcePolicyDAO; | ||
| import org.dspace.authorize.service.ResourcePolicyService; | ||
| import org.dspace.content.DSpaceObject; | ||
| import org.dspace.content.Item; | ||
| import org.dspace.content.factory.ContentServiceFactory; | ||
| import org.dspace.core.Constants; | ||
| import org.dspace.core.Context; | ||
| import org.dspace.core.ProvenanceService; | ||
| import org.dspace.eperson.EPerson; | ||
| import org.dspace.eperson.Group; | ||
| import org.dspace.eperson.service.GroupService; | ||
| import org.dspace.event.Event; | ||
| import org.springframework.beans.factory.annotation.Autowired; | ||
|
|
||
| /** | ||
|
|
@@ -55,8 +57,6 @@ public class ResourcePolicyServiceImpl implements ResourcePolicyService { | |
| @Autowired | ||
| ProvenanceService provenanceService; | ||
|
|
||
| @Autowired | ||
| ResourcePolicyService resourcePolicyService; | ||
|
|
||
| protected ResourcePolicyServiceImpl() { | ||
| } | ||
|
|
@@ -154,6 +154,7 @@ public List<ResourcePolicy> findByTypeGroupActionExceptId(Context context, DSpac | |
| public void delete(Context context, ResourcePolicy resourcePolicy) throws SQLException, AuthorizeException { | ||
| // FIXME: authorizations | ||
| // Remove ourself | ||
| DSpaceObject dso = resourcePolicy.getdSpaceObject(); | ||
| resourcePolicyDAO.delete(context, resourcePolicy); | ||
|
|
||
| context.turnOffAuthorisationSystem(); | ||
|
|
@@ -163,6 +164,7 @@ public void delete(Context context, ResourcePolicy resourcePolicy) throws SQLExc | |
| .updateLastModified(context, resourcePolicy.getdSpaceObject()); | ||
| } | ||
| context.restoreAuthSystemState(); | ||
| addEventModify(context, dso); | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -226,6 +228,7 @@ public ResourcePolicy clone(Context context, ResourcePolicy resourcePolicy) | |
| clone.setRpType((String) ObjectUtils.clone(resourcePolicy.getRpType())); | ||
| clone.setRpDescription((String) ObjectUtils.clone(resourcePolicy.getRpDescription())); | ||
| update(context, clone); | ||
| DSpaceObject dso = resourcePolicy.getdSpaceObject(); | ||
| return clone; | ||
| } | ||
|
|
||
|
|
@@ -235,6 +238,7 @@ public void removeAllPolicies(Context c, DSpaceObject o) throws SQLException, Au | |
| c.turnOffAuthorisationSystem(); | ||
| contentServiceFactory.getDSpaceObjectService(o).updateLastModified(c, o); | ||
| c.restoreAuthSystemState(); | ||
| addEventModify(c, o); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -243,20 +247,20 @@ public void removePolicies(Context c, DSpaceObject o, String type) throws SQLExc | |
| c.turnOffAuthorisationSystem(); | ||
| contentServiceFactory.getDSpaceObjectService(o).updateLastModified(c, o); | ||
| c.restoreAuthSystemState(); | ||
| addEventModify(c, o); | ||
| } | ||
|
|
||
| @Override | ||
| public void removePolicies(Context c, DSpaceObject o, String type, int action) | ||
| throws SQLException, AuthorizeException { | ||
| // Get all read policies of the dso before removing them | ||
| List<ResourcePolicy> resPolicies = resourcePolicyService.find(c, o, type); | ||
|
|
||
| List<ResourcePolicy> resPolicies = find(c, o, type); | ||
| resourcePolicyDAO.deleteByDsoAndTypeAndAction(c, o, type, action); | ||
| c.turnOffAuthorisationSystem(); | ||
| contentServiceFactory.getDSpaceObjectService(o).updateLastModified(c, o); | ||
| c.restoreAuthSystemState(); | ||
|
|
||
milanmajchrak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| provenanceService.removeReadPolicies(c, o, resPolicies); | ||
| addEventModify(c, o); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -266,6 +270,7 @@ public void removeDsoGroupPolicies(Context context, DSpaceObject dso, Group grou | |
| context.turnOffAuthorisationSystem(); | ||
| contentServiceFactory.getDSpaceObjectService(dso).updateLastModified(context, dso); | ||
| context.restoreAuthSystemState(); | ||
| addEventModify(context, dso); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -285,6 +290,10 @@ public void removeAllEPersonPolicies(Context context, EPerson ePerson) throws SQ | |
|
|
||
| @Override | ||
| public void removeGroupPolicies(Context c, Group group) throws SQLException { | ||
| List<ResourcePolicy> resourcePolicies = find(c, group); | ||
| for (ResourcePolicy r : resourcePolicies) { | ||
| addEventModify(c, r.getdSpaceObject()); | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor comment. |
||
| resourcePolicyDAO.deleteByGroup(c, group); | ||
| } | ||
|
|
||
|
|
@@ -297,6 +306,7 @@ public void removePolicies(Context c, DSpaceObject o, int actionId) throws SQLEx | |
| c.turnOffAuthorisationSystem(); | ||
| contentServiceFactory.getDSpaceObjectService(o).updateLastModified(c, o); | ||
| c.restoreAuthSystemState(); | ||
| addEventModify(c, o); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -307,6 +317,7 @@ public void removeDsoAndTypeNotEqualsToPolicies(Context c, DSpaceObject o, Strin | |
| c.turnOffAuthorisationSystem(); | ||
| contentServiceFactory.getDSpaceObjectService(o).updateLastModified(c, o); | ||
| c.restoreAuthSystemState(); | ||
| addEventModify(c, o); | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -338,6 +349,7 @@ public void update(Context context, List<ResourcePolicy> resourcePolicies) throw | |
|
|
||
| // FIXME: Check authorisation | ||
| resourcePolicyDAO.save(context, resourcePolicy); | ||
| addEventModify(context, resourcePolicy.getdSpaceObject()); | ||
| } | ||
|
|
||
| //Update the last modified timestamp of all related DSpace Objects | ||
|
|
@@ -436,4 +448,12 @@ public boolean isMyResourcePolicy(Context context, EPerson eperson, Integer id) | |
| } | ||
| return isMy; | ||
| } | ||
|
|
||
| public void addEventModify(Context context, DSpaceObject dso) { | ||
| if (dso instanceof Item) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, I'd prefer |
||
| Item item = (Item) dso; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, casting dso to item is not needed, you can simply use: |
||
| context.addEvent(new Event(Event.MODIFY, -1, null, | ||
| Constants.ITEM, item.getID(), "")); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,172 @@ | ||
| /** | ||
| * The contents of this file are subject to the license and copyright | ||
| * detailed in the LICENSE and NOTICE files at the root of the source | ||
| * tree and available online at | ||
| * | ||
| * http://www.dspace.org/license/ | ||
| */ | ||
| package org.dspace.event; | ||
|
|
||
| import java.sql.SQLException; | ||
| import java.util.HashSet; | ||
| import java.util.Iterator; | ||
| import java.util.Objects; | ||
| import java.util.Set; | ||
|
|
||
| import org.apache.log4j.Logger; | ||
| import org.dspace.content.Bitstream; | ||
| import org.dspace.content.Bundle; | ||
| import org.dspace.content.Collection; | ||
| import org.dspace.content.Community; | ||
| import org.dspace.content.DSpaceObject; | ||
| import org.dspace.content.Item; | ||
| import org.dspace.content.factory.ContentServiceFactory; | ||
| import org.dspace.content.service.ItemService; | ||
| import org.dspace.core.Constants; | ||
| import org.dspace.core.Context; | ||
| import org.dspace.xoai.app.BasicConfiguration; | ||
| import org.dspace.xoai.app.XOAI; | ||
| import org.springframework.context.annotation.AnnotationConfigApplicationContext; | ||
|
|
||
| /** | ||
| * The OAIIndexEventConsumer determining which items need to be indexed or updated based on the event type and subject. | ||
| * It listens for changes to items, collections, communities, | ||
| * bundles, and bitstreams, and updates the OAI index accordingly. | ||
| * The indexing is done using the XOAI indexer after all relevant items are collected. | ||
| * | ||
| * Class is copied from UFAL/CLARIN-DSPACE (https://github.com/ufal/clarin-dspace) and modified by | ||
| * @author Michaela Paurikova (dspace at dataquest.sk) | ||
| */ | ||
milanmajchrak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| public class OAIIndexEventConsumer implements Consumer { | ||
milanmajchrak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| /** | ||
| * log4j logger | ||
| */ | ||
| private static final Logger log = Logger.getLogger(OAIIndexEventConsumer.class); | ||
|
|
||
| ItemService itemService = ContentServiceFactory.getInstance().getItemService(); | ||
|
|
||
| // Collect Items, Collections, Communities that need indexing. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is slightly confusing. Either change it to (2) With (2) you'd avoid the need of few casting below in the code |
||
| private Set<Item> itemsToUpdate = null; | ||
|
|
||
| @Override | ||
| public void initialize() throws Exception { | ||
| // No-op. | ||
| } | ||
|
|
||
| /** | ||
| * Consume a content event -- just build the sets of objects to add (new) to | ||
| * the index, update, and delete. | ||
| * | ||
| * @param ctx DSpace context | ||
| * @param event Content event | ||
| */ | ||
| public void consume(Context ctx, Event event) throws Exception { | ||
|
|
||
| if (Objects.isNull(itemsToUpdate)) { | ||
| itemsToUpdate = new HashSet<Item>(); | ||
| } | ||
|
|
||
| int st = event.getSubjectType(); | ||
| if (!(st == Constants.ITEM || st == Constants.BUNDLE | ||
milanmajchrak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| || st == Constants.COLLECTION || st == Constants.COMMUNITY || st == Constants.BITSTREAM)) { | ||
| log | ||
| .warn("IndexConsumer should not have been given this kind of Subject in an event, skipping: " | ||
| + event.toString()); | ||
| return; | ||
| } | ||
|
|
||
| DSpaceObject subject = event.getSubject(ctx); | ||
| DSpaceObject object = event.getObject(ctx); | ||
|
|
||
milanmajchrak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| int et = event.getEventType(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd move this line below - to the place, where "et" number is actually needed. |
||
|
|
||
| if (Objects.nonNull(object) && event.getObjectType() == Constants.ITEM) { | ||
| // Just update the object. | ||
| itemsToUpdate.add((Item)object); | ||
| return; | ||
| } | ||
|
|
||
| if (Objects.isNull(subject)) { | ||
| return; | ||
| } | ||
|
|
||
| if (event.getSubjectType() == Constants.COLLECTION || event.getSubjectType() == Constants.COMMUNITY) { | ||
| if (et == Event.MODIFY || et == Event.MODIFY_METADATA || et == Event.REMOVE || et == Event.DELETE) { | ||
| // Must update all the items. | ||
| if (subject.getType() == Constants.COMMUNITY) { | ||
| for (Collection col : ((Community)subject).getCollections()) { | ||
| addAll(ctx, col); | ||
| } | ||
| } else { | ||
| addAll(ctx, (Collection)subject); | ||
| } | ||
| } | ||
| } else if (event.getSubjectType() == Constants.BITSTREAM || event.getSubjectType() == Constants.BUNDLE) { | ||
| // Must update owning items regardless the event. | ||
| if (subject.getType() == Constants.BITSTREAM) { | ||
| for (Bundle bun : ((Bitstream)subject).getBundles()) { | ||
| itemsToUpdate.addAll(bun.getItems()); | ||
| } | ||
| } else { | ||
| itemsToUpdate.addAll(((Bundle)subject).getItems()); | ||
| } | ||
| } else if (event.getSubjectType() == Constants.ITEM) { | ||
| // Any event reindex this item. | ||
| itemsToUpdate.add((Item)subject); | ||
| } | ||
milanmajchrak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| private void addAll(Context context, Collection col) throws SQLException { | ||
| Iterator<Item> i = itemService.findByCollection(context, col); | ||
| while (i.hasNext()) { | ||
| itemsToUpdate.add(i.next()); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Process sets of objects to add, update, and delete in index. Correct for | ||
| * interactions between the sets -- e.g. objects which were deleted do not | ||
| * need to be added or updated, new objects don't also need an update, etc. | ||
| */ | ||
| public void end(Context ctx) throws Exception { | ||
|
|
||
| Context anonymousContext = null; | ||
| try { | ||
| if (Objects.isNull(itemsToUpdate)) { | ||
| return; | ||
| } | ||
|
|
||
| Set<Item> filtered = new HashSet<Item>(itemsToUpdate.size()); | ||
| for (Item item : itemsToUpdate) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could be replaced with one line, I think: |
||
| if (Objects.isNull(item.getHandle())) { | ||
| // Probably submission item, skip. | ||
| continue; | ||
| } | ||
| filtered.add(item); | ||
| } | ||
|
|
||
| // "Free" the resources. | ||
| itemsToUpdate = null; | ||
|
|
||
| anonymousContext = new Context(); | ||
| XOAI indexer = new XOAI(anonymousContext, false, false); | ||
| AnnotationConfigApplicationContext applicationContext = new AnnotationConfigApplicationContext( | ||
| new Class[] { BasicConfiguration.class }); | ||
| applicationContext.getAutowireCapableBeanFactory() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this necessary because we are in dspace-oai? |
||
| .autowireBean(indexer); | ||
| indexer.indexItems(filtered); | ||
| applicationContext.close(); | ||
| } catch (Exception e) { | ||
| itemsToUpdate = null; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd move (itemsToUpdate = null) to finally block. Similarly, line 149 (itemsToUpdate = null) can be removed |
||
| throw e; | ||
| } finally { | ||
| if (Objects.nonNull(anonymousContext)) { | ||
| anonymousContext.complete(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public void finish(Context ctx) throws Exception { | ||
| // No-op | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,6 +83,11 @@ | |
| public class XOAI { | ||
| private static Logger log = LogManager.getLogger(XOAI.class); | ||
|
|
||
| @Autowired | ||
| private XOAICacheService cacheService; | ||
| @Autowired | ||
| private XOAIItemCacheService itemCacheService; | ||
|
|
||
| // needed because the solr query only returns 10 rows by default | ||
| private final Context context; | ||
| private final boolean verbose; | ||
|
|
@@ -105,6 +110,11 @@ public class XOAI { | |
|
|
||
| private List<XOAIExtensionItemCompilePlugin> extensionPlugins; | ||
|
|
||
| { | ||
| AnnotationConfigApplicationContext applicationContext = | ||
| new AnnotationConfigApplicationContext(BasicConfiguration.class); | ||
| } | ||
|
|
||
| private List<String> getFileFormats(Item item) { | ||
| List<String> formats = new ArrayList<>(); | ||
| try { | ||
|
|
@@ -719,4 +729,33 @@ private static void usage() { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Delete the item from Solr by the ID of the item | ||
| */ | ||
| private void deleteItemByQuery(Item item) throws SolrServerException, IOException { | ||
| SolrClient solrClient = solrServerResolver.getServer(); | ||
| solrClient.deleteByQuery("item.id:" + item.getID().toString()); | ||
| // Solr keeps changes in memory (transaction log) for performance. | ||
| // Without commit(), those changes aren't written to the actual index files. | ||
| // Queries won't reflect deletions (or any updates) until a commit or auto-commit happens. | ||
| solrClient.commit(); | ||
| } | ||
|
|
||
| public void indexItems(java.util.Collection<Item> items) throws Exception { | ||
| for (Item item : items) { | ||
| try { | ||
| deleteItemByQuery(item); | ||
| solrServerResolver.getServer().add(this.index(item)); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we do better here?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Solr keeps changes in memory (transaction log) for performance. Without commit(), those changes aren't written to the actual index files. Queries won't reflect deletions (or any updates) until a commit or auto-commit happens. The commit is also called in the index method and in another place in the code. |
||
| } catch (IOException | XMLStreamException | SQLException | WritingXmlException | SolrServerException e) { | ||
| // If an exception occurs while indexing the item or adding it to the Solr server, | ||
| // the exception is logged, and no further items will be processed. | ||
| log.error("Cannot reindex the item with ID: " + item.getID() + " because: " + e.getMessage()); | ||
| throw new RuntimeException("Cannot reindex the item with ID: " + item.getID() + " because: " | ||
| + e.getMessage()); | ||
| } | ||
| } | ||
| solrServerResolver.getServer().commit(); | ||
| cacheService.deleteAll(); | ||
| itemCacheService.deleteAll(); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.