-
-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Fix exclude consistency fields 13131 #13305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1038371
e3c1fe8
b073a0c
8e7b892
6189539
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -8,17 +8,62 @@ | |||||||||||||
import java.util.Map; | ||||||||||||||
import java.util.SequencedCollection; | ||||||||||||||
import java.util.Set; | ||||||||||||||
import java.util.stream.Collectors; | ||||||||||||||
import java.util.stream.Stream; | ||||||||||||||
import java.util.function.BiConsumer; | ||||||||||||||
|
||||||||||||||
|
||||||||||||||
import org.jabref.logic.bibtex.comparator.BibEntryByCitationKeyComparator; | ||||||||||||||
import org.jabref.logic.bibtex.comparator.BibEntryByFieldsComparator; | ||||||||||||||
import org.jabref.logic.bibtex.comparator.FieldComparatorStack; | ||||||||||||||
import org.jabref.model.entry.BibEntry; | ||||||||||||||
import org.jabref.model.entry.field.Field; | ||||||||||||||
import org.jabref.model.entry.field.SpecialField; | ||||||||||||||
import org.jabref.model.entry.field.StandardField; | ||||||||||||||
import org.jabref.model.entry.field.UserSpecificCommentField; | ||||||||||||||
import org.jabref.model.entry.types.EntryType; | ||||||||||||||
|
||||||||||||||
public class BibliographyConsistencyCheck { | ||||||||||||||
|
||||||||||||||
// ---------------------------------------------------------------- | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use proper Java doc, keep comment style |
||||||||||||||
// Fields that we do *not* want to check for consistency: | ||||||||||||||
// – every field in StandardField.AUTOMATIC_FIELDS | ||||||||||||||
// – COMMENT, CROSSREF, CITES, PDF, REVIEW, SORTKEY, SORTNAME, TYPE, XREF, GROUPS | ||||||||||||||
// – plus any SpecialField or UserSpecificCommentField (filtered at runtime) | ||||||||||||||
// ---------------------------------------------------------------- | ||||||||||||||
private static final Set<Field> FILTERED_FIELDS = Stream | ||||||||||||||
// start with JabRef’s built‐in AUTOMATIC_FIELDS set | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove this comment, code is self-explanatory There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The issue with AI comments is that if humans and AI both work in the code, it is not ensured if the comments match the implementation if the code is updated. |
||||||||||||||
.concat( | ||||||||||||||
StandardField.AUTOMATIC_FIELDS.stream(), | ||||||||||||||
// then add the extra fields to ignore | ||||||||||||||
Stream.of( | ||||||||||||||
StandardField.COMMENT, | ||||||||||||||
StandardField.CROSSREF, | ||||||||||||||
StandardField.CITES, | ||||||||||||||
StandardField.PDF, | ||||||||||||||
StandardField.REVIEW, | ||||||||||||||
StandardField.SORTKEY, | ||||||||||||||
StandardField.SORTNAME, | ||||||||||||||
StandardField.TYPE, | ||||||||||||||
StandardField.XREF, | ||||||||||||||
StandardField.GROUPS | ||||||||||||||
) | ||||||||||||||
) | ||||||||||||||
.collect(Collectors.toSet()); | ||||||||||||||
|
||||||||||||||
/** | ||||||||||||||
* Drop any field we don’t want to check. | ||||||||||||||
*/ | ||||||||||||||
private static Set<Field> filterFields(Collection<Field> fields) { | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: Can you use a better method name, hinting at what we are filtering against? "filterFIelds" is a bit generic. Lets consider filterXFields or removeXFields - think what one word can "X" be, based on the logic? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, we can be more specific here wrt the parameter
Suggested change
|
||||||||||||||
return fields.stream() | ||||||||||||||
.filter(f -> !FILTERED_FIELDS.contains(f)) | ||||||||||||||
.filter(f -> !(f instanceof SpecialField)) | ||||||||||||||
.filter(f -> !(f instanceof UserSpecificCommentField)) | ||||||||||||||
Comment on lines
+59
to
+61
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We avoid abbreviations
Suggested change
|
||||||||||||||
.collect(Collectors.toSet()); | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
|
||||||||||||||
|
||||||||||||||
public record Result(Map<EntryType, EntryTypeResult> entryTypeToResultMap) { | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
|
@@ -69,7 +114,10 @@ public Result check(List<BibEntry> entries, BiConsumer<Integer, Integer> entries | |||||||||||||
|
||||||||||||||
List<BibEntry> differingEntries = entryTypeToEntriesMap | ||||||||||||||
.get(entryType).stream() | ||||||||||||||
.filter(entry -> !entry.getFields().equals(commonFields)) | ||||||||||||||
.filter(entry -> { | ||||||||||||||
Set<Field> entryFiltered = filterFields(entry.getFields()); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The variable name 'entryFiltered' is not descriptive enough. It should convey the purpose or content of the variable more clearly. |
||||||||||||||
return !entryFiltered.equals(commonFields); | ||||||||||||||
}) | ||||||||||||||
.sorted(comparatorStack) | ||||||||||||||
.toList(); | ||||||||||||||
|
||||||||||||||
|
@@ -80,17 +128,25 @@ public Result check(List<BibEntry> entries, BiConsumer<Integer, Integer> entries | |||||||||||||
} | ||||||||||||||
|
||||||||||||||
private static void collectEntriesIntoMaps(List<BibEntry> entries, Map<EntryType, Set<Field>> entryTypeToFieldsInAnyEntryMap, Map<EntryType, Set<Field>> entryTypeToFieldsInAllEntriesMap, Map<EntryType, Set<BibEntry>> entryTypeToEntriesMap) { | ||||||||||||||
for (BibEntry entry : entries) { | ||||||||||||||
EntryType entryType = entry.getType(); | ||||||||||||||
|
||||||||||||||
Set<Field> fieldsInAnyEntry = entryTypeToFieldsInAnyEntryMap.computeIfAbsent(entryType, _ -> new HashSet<>()); | ||||||||||||||
fieldsInAnyEntry.addAll(entry.getFields()); | ||||||||||||||
|
||||||||||||||
Set<Field> fieldsInAllEntries = entryTypeToFieldsInAllEntriesMap.computeIfAbsent(entryType, _ -> new HashSet<>(entry.getFields())); | ||||||||||||||
fieldsInAllEntries.retainAll(entry.getFields()); | ||||||||||||||
|
||||||||||||||
Set<BibEntry> entriesOfType = entryTypeToEntriesMap.computeIfAbsent(entryType, _ -> new HashSet<>()); | ||||||||||||||
entriesOfType.add(entry); | ||||||||||||||
} | ||||||||||||||
entries.forEach(entry -> { | ||||||||||||||
EntryType entryType = entry.getType(); | ||||||||||||||
|
||||||||||||||
// 1) filter out AUTOMATIC_FIELDS + COMMENT, CROSSREF, CITES, PDF, REVIEW, | ||||||||||||||
// SORTKEY, SORTNAME, TYPE, XREF, GROUPS | ||||||||||||||
// 2) remove any SpecialField or UserSpecificCommentField | ||||||||||||||
Set<Field> filteredFields = filterFields(entry.getFields()); | ||||||||||||||
|
||||||||||||||
Set<Field> fieldsInAnyEntry = entryTypeToFieldsInAnyEntryMap | ||||||||||||||
.computeIfAbsent(entryType, k -> new HashSet<>()); | ||||||||||||||
fieldsInAnyEntry.addAll(filteredFields); | ||||||||||||||
|
||||||||||||||
Set<Field> fieldsInAllEntries = entryTypeToFieldsInAllEntriesMap | ||||||||||||||
.computeIfAbsent(entryType, k -> new HashSet<>(filteredFields)); | ||||||||||||||
fieldsInAllEntries.retainAll(filteredFields); | ||||||||||||||
|
||||||||||||||
Set<BibEntry> entriesOfType = entryTypeToEntriesMap | ||||||||||||||
.computeIfAbsent(entryType, k -> new HashSet<>()); | ||||||||||||||
entriesOfType.add(entry); | ||||||||||||||
}); | ||||||||||||||
} | ||||||||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,10 @@ | |
import org.jabref.logic.importer.ImportFormatPreferences; | ||
import org.jabref.logic.importer.fileformat.BibtexImporter; | ||
import org.jabref.model.entry.BibEntry; | ||
import org.jabref.model.entry.field.SpecialField; | ||
import org.jabref.model.entry.field.StandardField; | ||
import org.jabref.model.entry.field.UnknownField; | ||
import org.jabref.model.entry.field.UserSpecificCommentField; | ||
import org.jabref.model.entry.types.StandardEntryType; | ||
import org.jabref.model.util.DummyFileUpdateMonitor; | ||
|
||
|
@@ -18,6 +20,8 @@ | |
import org.mockito.Answers; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertNotNull; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
import static org.mockito.Mockito.mock; | ||
|
||
class BibliographyConsistencyCheckTest { | ||
|
@@ -102,4 +106,43 @@ void checkLibraryWithoutIssues(@TempDir Path tempDir) { | |
BibliographyConsistencyCheck.Result expected = new BibliographyConsistencyCheck.Result(Map.of()); | ||
assertEquals(expected, result); | ||
} | ||
|
||
/** | ||
* Entries that differ only by automatic, comment, PDF, special or user-specific comment fields | ||
* should produce no consistency results. | ||
*/ | ||
@Test | ||
void filteredFieldsAreIgnored() { | ||
// MISC entries differing only in filtered fields: | ||
BibEntry a = new BibEntry(StandardEntryType.Misc, "a") | ||
.withField(StandardField.COMMENT, "note") | ||
.withField(StandardField.PDF, "file.pdf") | ||
.withField(new UserSpecificCommentField("XYZ"), "foo") | ||
.withField(SpecialField.PRIORITY, "high"); | ||
BibEntry b = new BibEntry(StandardEntryType.Misc, "b") | ||
.withField(StandardField.COMMENT, "another note") | ||
.withField(StandardField.PDF, "other.pdf"); | ||
|
||
var result = new BibliographyConsistencyCheck().check(List.of(a, b)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't use |
||
|
||
// Since all differences are in filtered fields, there should be no entries reported: | ||
assertTrue(result.entryTypeToResultMap().isEmpty(), | ||
"Differences only in filtered fields must be ignored"); | ||
Comment on lines
+129
to
+130
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use of assertTrue to check for an empty map is less informative than using assertEquals to directly compare the map to an empty map, which would provide clearer test failure messages. |
||
} | ||
|
||
@SuppressWarnings("checkstyle:RegexpMultiline") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not suppress checkstyle warnings without having a good reason to do so. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi subhramit, |
||
@Test | ||
void nonFilteredFieldDifferenceIsReported() { | ||
// Two MISC entries differing in AUTHOR (not filtered) | ||
BibEntry withAuthor = new BibEntry(StandardEntryType.Misc, "1") | ||
.withField(StandardField.AUTHOR, "Knuth"); | ||
BibEntry withoutAuthor = new BibEntry(StandardEntryType.Misc, "2"); | ||
|
||
var result = new BibliographyConsistencyCheck().check(List.of(withAuthor, withoutAuthor)); | ||
|
||
// AUTHOR should be reported as a unique field | ||
var typeResult = result.entryTypeToResultMap().get(StandardEntryType.Misc); | ||
Comment on lines
+141
to
+144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comments regarding |
||
assertNotNull(typeResult); | ||
assertTrue(typeResult.fields().contains(StandardField.AUTHOR)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using assertTrue to check if a collection contains an element is less informative than using assertEquals to compare the collection to an expected collection, which would provide clearer test failure messages. |
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Checkstyle