Skip to content

Commit fae3896

Browse files
authored
Fixing performance issues when bulk deleting (#12926)
* Fixing performance issues when bulk deleting * Removing comments * Update CHANGELOG.md * Delete benchmark Rmoved a benchmark that I was using to test the performance that was accidently pushed. * Fixing JMH Config * Checkstyle Fix
1 parent f5116b1 commit fae3896

File tree

5 files changed

+95
-20
lines changed

5 files changed

+95
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
108108
- We fixed an issue where downloading PDFs from URLs to empty entries resulted in meaningless filenames like "-.pdf". [#12917](https://github.com/JabRef/jabref/issues/12917)
109109
- We fixed an issue where pasting a PDF URL into the main table caused an import error instead of creating a new entry. [#12911](https://github.com/JabRef/jabref/pull/12911)
110110
- We fixed an issue where libraries would sometimes be hidden when closing tabs with the Welcome tab open. [#12894](https://github.com/JabRef/jabref/issues/12894)
111+
- We fixed an issue with deleting entries in large libraries that caused it to take a long time. [#8976](https://github.com/JabRef/jabref/issues/8976)
111112

112113
### Removed
113114

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,7 @@ jmh {
10041004
warmupIterations = 5
10051005
iterations = 10
10061006
fork = 2
1007+
zip64 = true
10071008
}
10081009

10091010
requirementTracing {

src/jmh/java/org/jabref/benchmarks/Benchmarks.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
import java.io.IOException;
44
import java.io.StringReader;
55
import java.io.StringWriter;
6+
import java.util.Collections;
67
import java.util.List;
78
import java.util.Random;
89

910
import org.jabref.logic.bibtex.FieldPreferences;
1011
import org.jabref.logic.citationkeypattern.CitationKeyPatternPreferences;
12+
import org.jabref.logic.exporter.BibDatabaseWriter;
1113
import org.jabref.logic.exporter.BibWriter;
1214
import org.jabref.logic.exporter.BibtexDatabaseWriter;
1315
import org.jabref.logic.exporter.SelfContainedSaveConfiguration;
@@ -31,8 +33,10 @@
3133
import org.jabref.model.groups.KeywordGroup;
3234
import org.jabref.model.groups.WordKeywordGroup;
3335
import org.jabref.model.metadata.MetaData;
36+
import org.jabref.model.metadata.SaveOrder;
3437

3538
import com.airhacks.afterburner.injection.Injector;
39+
import org.mockito.Answers;
3640
import org.openjdk.jmh.Main;
3741
import org.openjdk.jmh.annotations.Benchmark;
3842
import org.openjdk.jmh.annotations.Scope;
@@ -77,11 +81,15 @@ public void init() throws Exception {
7781
private StringWriter getOutputWriter() throws IOException {
7882
StringWriter outputWriter = new StringWriter();
7983
BibWriter bibWriter = new BibWriter(outputWriter, OS.NEWLINE);
84+
SelfContainedSaveConfiguration saveConfiguration = new SelfContainedSaveConfiguration(SaveOrder.getDefaultSaveOrder(), false, BibDatabaseWriter.SaveType.WITH_JABREF_META_DATA, false);
85+
FieldPreferences fieldPreferences = new FieldPreferences(true, Collections.emptyList(), Collections.emptyList());
86+
CitationKeyPatternPreferences citationKeyPatternPreferences = mock(CitationKeyPatternPreferences.class, Answers.RETURNS_DEEP_STUBS);
87+
8088
BibtexDatabaseWriter databaseWriter = new BibtexDatabaseWriter(
8189
bibWriter,
82-
mock(SelfContainedSaveConfiguration.class),
83-
mock(FieldPreferences.class),
84-
mock(CitationKeyPatternPreferences.class),
90+
saveConfiguration,
91+
fieldPreferences,
92+
citationKeyPatternPreferences,
8593
new BibEntryTypesManager());
8694
databaseWriter.savePartOfDatabase(new BibDatabaseContext(database, new MetaData()), database.getEntries());
8795
return outputWriter;

src/main/java/org/jabref/model/database/BibDatabase.java

Lines changed: 76 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import java.util.Set;
1717
import java.util.TreeSet;
1818
import java.util.concurrent.ConcurrentHashMap;
19+
import java.util.function.Consumer;
1920
import java.util.regex.Pattern;
2021
import java.util.stream.Collectors;
2122

@@ -28,11 +29,13 @@
2829
import org.jabref.model.entry.BibEntry;
2930
import org.jabref.model.entry.BibtexString;
3031
import org.jabref.model.entry.Month;
32+
import org.jabref.model.entry.ParsedEntryLink;
3133
import org.jabref.model.entry.event.EntriesEventSource;
3234
import org.jabref.model.entry.event.EntryChangedEvent;
3335
import org.jabref.model.entry.event.FieldChangedEvent;
3436
import org.jabref.model.entry.field.Field;
3537
import org.jabref.model.entry.field.FieldFactory;
38+
import org.jabref.model.entry.field.FieldProperty;
3639
import org.jabref.model.entry.field.StandardField;
3740
import org.jabref.model.strings.StringUtil;
3841

@@ -61,6 +64,9 @@ public class BibDatabase {
6164
// Not included in equals, because it is not relevant for the content of the database
6265
private final EventBus eventBus = new EventBus();
6366

67+
// Reverse index for citation links
68+
private final Map<String, Set<BibEntry>> citationIndex = new ConcurrentHashMap<>();
69+
6470
private String preamble;
6571

6672
// All file contents below the last entry in the file
@@ -192,7 +198,11 @@ public synchronized void insertEntries(List<BibEntry> newEntries, EntriesEventSo
192198
eventBus.post(new EntriesAddedEvent(newEntries, newEntries.getFirst(), eventSource));
193199
}
194200
entries.addAll(newEntries);
195-
newEntries.forEach(entry -> entriesId.put(entry.getId(), entry));
201+
newEntries.forEach(entry -> {
202+
entriesId.put(entry.getId(), entry);
203+
indexEntry(entry);
204+
}
205+
);
196206
}
197207

198208
public synchronized void removeEntry(BibEntry bibEntry) {
@@ -223,17 +233,72 @@ public synchronized void removeEntries(List<BibEntry> toBeDeleted) {
223233
public synchronized void removeEntries(List<BibEntry> toBeDeleted, EntriesEventSource eventSource) {
224234
Objects.requireNonNull(toBeDeleted);
225235

226-
List<String> ids = new ArrayList<>();
236+
Collection idsToBeDeleted;
237+
if (toBeDeleted.size() > 10) {
238+
idsToBeDeleted = new HashSet<>();
239+
} else {
240+
idsToBeDeleted = new ArrayList<>(toBeDeleted.size());
241+
}
242+
227243
for (BibEntry entry : toBeDeleted) {
228-
ids.add(entry.getId());
244+
idsToBeDeleted.add(entry.getId());
229245
}
230-
boolean anyRemoved = entries.removeIf(entry -> ids.contains(entry.getId()));
231-
if (anyRemoved) {
232-
toBeDeleted.forEach(entry -> entriesId.remove(entry.getId()));
233-
eventBus.post(new EntriesRemovedEvent(toBeDeleted, eventSource));
246+
247+
List<BibEntry> newEntries = new ArrayList<>(entries);
248+
newEntries.removeIf(entry -> idsToBeDeleted.contains(entry.getId()));
249+
250+
toBeDeleted.forEach(entry -> {
251+
entriesId.remove(entry.getId());
252+
removeEntryFromIndex(entry);
253+
});
254+
255+
entries.setAll(newEntries);
256+
eventBus.post(new EntriesRemovedEvent(toBeDeleted, eventSource));
257+
}
258+
259+
private void forEachCitationKey(BibEntry entry, Consumer<String> keyConsumer) {
260+
for (Field field : entry.getFields()) {
261+
if (field.getProperties().contains(FieldProperty.SINGLE_ENTRY_LINK) || field.getProperties().contains(FieldProperty.MULTIPLE_ENTRY_LINK)) {
262+
List<ParsedEntryLink> parsedLinks = entry.getEntryLinkList(field, this);
263+
264+
for (ParsedEntryLink link : parsedLinks) {
265+
String key = link.getKey().trim();
266+
if (!key.isEmpty()) {
267+
keyConsumer.accept(key);
268+
}
269+
}
270+
}
234271
}
235272
}
236273

274+
public Set<BibEntry> getEntriesForCitationKey(String citationKey) {
275+
return citationIndex.getOrDefault(citationKey, Collections.emptySet());
276+
}
277+
278+
private Set<String> getReferencedCitationKeys(BibEntry entry) {
279+
Set<String> keys = new HashSet<>();
280+
forEachCitationKey(entry, key -> keys.add(key));
281+
return keys;
282+
}
283+
284+
private void indexEntry(BibEntry entry) {
285+
forEachCitationKey(entry, key ->
286+
citationIndex.computeIfAbsent(key, k -> ConcurrentHashMap.newKeySet()).add(entry)
287+
);
288+
}
289+
290+
private void removeEntryFromIndex(BibEntry entry) {
291+
forEachCitationKey(entry, key -> {
292+
Set<BibEntry> entriesForKey = citationIndex.get(key);
293+
if (entriesForKey != null) {
294+
entriesForKey.remove(entry);
295+
if (entriesForKey.isEmpty()) {
296+
citationIndex.remove(key);
297+
}
298+
}
299+
});
300+
}
301+
237302
/**
238303
* Returns the database's preamble.
239304
* If the preamble text consists only of whitespace, then also an empty optional is returned.
@@ -626,13 +691,11 @@ public String getNewLineSeparator() {
626691

627692
/**
628693
* @return The index of the given entry in the list of entries, or -1 if the entry is not in the list.
629-
*
630694
* @implNote New entries are always added to the end of the list and always get a higher ID.
631-
* See {@link org.jabref.model.entry.BibEntry#BibEntry(org.jabref.model.entry.types.EntryType) BibEntry},
632-
* {@link org.jabref.model.entry.IdGenerator IdGenerator},
633-
* {@link BibDatabase#insertEntries(List, EntriesEventSource) insertEntries}.
634-
* Therefore, using binary search to find the index.
635-
*
695+
* See {@link org.jabref.model.entry.BibEntry#BibEntry(org.jabref.model.entry.types.EntryType) BibEntry},
696+
* {@link org.jabref.model.entry.IdGenerator IdGenerator},
697+
* {@link BibDatabase#insertEntries(List, EntriesEventSource) insertEntries}.
698+
* Therefore, using binary search to find the index.
636699
* @implNote IDs are zero-padded strings, so there is no need to convert them to integers for comparison.
637700
*/
638701
public int indexOf(BibEntry bibEntry) {

src/main/java/org/jabref/model/database/KeyChangeListener.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import java.util.Arrays;
55
import java.util.List;
66
import java.util.Optional;
7+
import java.util.Set;
78

89
import org.jabref.model.database.event.EntriesRemovedEvent;
910
import org.jabref.model.entry.BibEntry;
@@ -41,17 +42,18 @@ public void listen(EntriesRemovedEvent event) {
4142
}
4243

4344
private void updateEntryLinks(String newKey, String oldKey) {
44-
for (BibEntry entry : database.getEntries()) {
45+
Set<BibEntry> affectedEntries = database.getEntriesForCitationKey(oldKey);
46+
for (BibEntry entry : affectedEntries) {
4547
entry.getFields(field -> field.getProperties().contains(FieldProperty.SINGLE_ENTRY_LINK))
4648
.forEach(field -> {
4749
String fieldContent = entry.getField(field).orElseThrow();
4850
replaceSingleKeyInField(newKey, oldKey, entry, field, fieldContent);
4951
});
5052
entry.getFields(field -> field.getProperties().contains(FieldProperty.MULTIPLE_ENTRY_LINK))
5153
.forEach(field -> {
52-
String fieldContent = entry.getField(field).orElseThrow();
53-
replaceKeyInMultiplesKeyField(newKey, oldKey, entry, field, fieldContent);
54-
});
54+
String fieldContent = entry.getField(field).orElseThrow();
55+
replaceKeyInMultiplesKeyField(newKey, oldKey, entry, field, fieldContent);
56+
});
5557
}
5658
}
5759

0 commit comments

Comments
 (0)