Skip to content

Commit e73dd2e

Browse files
tobiasdiezSiedlerchr
authored andcommitted
Improve author parsing (#4931)
Fixes #4864. The author class added an additional space when converting "A O" to the dotted version "A. O.".
1 parent 63634b0 commit e73dd2e

File tree

5 files changed

+36
-41
lines changed

5 files changed

+36
-41
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
8282
- We fixed an issue where a non-existing aux file in a group made it impossible to open the library. [#4735](https://github.com/JabRef/jabref/issues/4735)
8383
- We fixed an issue where some journal names were wrongly marked as abbreviated. [#4115](https://github.com/JabRef/jabref/issues/4115)
8484
- We fixed an issue where the custom file column were sorted incorrectly. https://github.com/JabRef/jabref/issues/3119
85+
- We improved the parsing of author names whose infix is abbreviated without a dot. [#4864](https://github.com/JabRef/jabref/issues/4864)
8586
- We fixed an issues where the entry losses focus when a field is edited and at the same time used for sorting. https://github.com/JabRef/jabref/issues/3373
8687
- We fixed an issue where the menu on Mac OS was not displayed in the usual Mac-specific way. https://github.com/JabRef/jabref/issues/3146
8788
- We improved the integrity check for page numbers. [#4113](https://github.com/JabRef/jabref/issues/4113) and [feature request in the forum](http://discourse.jabref.org/t/pages-field-allow-use-of-en-dash/1199)

src/main/java/org/jabref/model/entry/Author.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ public Author(String first, String firstabbr, String von, String last, String jr
5656
}
5757

5858
public static String addDotIfAbbreviation(String name) {
59-
// Avoid arrayindexoutof.... :
6059
if ((name == null) || name.isEmpty()) {
6160
return name;
6261
}
@@ -114,23 +113,26 @@ public static String addDotIfAbbreviation(String name) {
114113
// AA -> A. A.
115114
// Only append ". " if the rest of the 'word' is uppercase
116115
boolean nextWordIsUppercase = true;
116+
char furtherChar = Character.MIN_VALUE;
117117
for (int j = i + 1; j < name.length(); j++) {
118-
char furtherChar = name.charAt(j);
119-
if (Character.isWhitespace(furtherChar) || (furtherChar == '-') || (furtherChar == '~')
120-
|| (furtherChar == '.')) {
118+
furtherChar = name.charAt(j);
119+
if (Character.isWhitespace(furtherChar) || (furtherChar == '-') || (furtherChar == '~') || (furtherChar == '.')) {
121120
// end of word
122121
break;
123122
}
124123

125-
boolean furtherIsUppercaseLetter = Character.isLetter(furtherChar)
126-
&& Character.isUpperCase(furtherChar);
124+
boolean furtherIsUppercaseLetter = Character.isLetter(furtherChar) && Character.isUpperCase(furtherChar);
127125
if (!furtherIsUppercaseLetter) {
128126
nextWordIsUppercase = false;
129127
break;
130128
}
131129
}
132130
if (nextWordIsUppercase) {
133-
sb.append(". ");
131+
if (Character.isWhitespace(furtherChar)) {
132+
sb.append(".");
133+
} else {
134+
sb.append(". ");
135+
}
134136
}
135137
}
136138

src/main/java/org/jabref/model/entry/AuthorListParser.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ public class AuthorListParser {
8080
* @return a parsed list of persons
8181
*/
8282
public AuthorList parse(String listOfNames) {
83-
8483
Objects.requireNonNull(listOfNames);
8584

8685
// initialization of parser
@@ -103,7 +102,6 @@ public AuthorList parse(String listOfNames) {
103102
* empty.
104103
*/
105104
private Optional<Author> getAuthor() {
106-
107105
List<Object> tokens = new ArrayList<>(); // initialization
108106
int vonStart = -1;
109107
int lastStart = -1;
@@ -154,7 +152,7 @@ private Optional<Author> getAuthor() {
154152
vonStart = tokens.size() - TOKEN_GROUP_LENGTH;
155153
break;
156154
}
157-
} else if ((lastStart < 0) && tokenCase) {
155+
} else if (tokenCase) {
158156
lastStart = tokens.size() - TOKEN_GROUP_LENGTH;
159157
break;
160158
}
@@ -255,13 +253,10 @@ private Optional<Author> getAuthor() {
255253
}
256254

257255
// Third step: do actual splitting, construct Author object
258-
String firstPart = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd, OFFSET_TOKEN,
259-
false);
260-
String firstAbbr = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd,
261-
OFFSET_TOKEN_ABBR, true);
256+
String firstPart = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd, OFFSET_TOKEN, false);
257+
String firstAbbr = firstPartStart < 0 ? null : concatTokens(tokens, firstPartStart, firstPartEnd, OFFSET_TOKEN_ABBR, true);
262258
String vonPart = vonPartStart < 0 ? null : concatTokens(tokens, vonPartStart, vonPartEnd, OFFSET_TOKEN, false);
263-
String lastPart = lastPartStart < 0 ? null : concatTokens(tokens, lastPartStart, lastPartEnd, OFFSET_TOKEN,
264-
false);
259+
String lastPart = lastPartStart < 0 ? null : concatTokens(tokens, lastPartStart, lastPartEnd, OFFSET_TOKEN, false);
265260
String jrPart = jrPartStart < 0 ? null : concatTokens(tokens, jrPartStart, jrPartEnd, OFFSET_TOKEN, false);
266261

267262
if ((firstPart != null) && (lastPart != null) && lastPart.equals(lastPart.toUpperCase(Locale.ROOT)) && (lastPart.length() < 5)
Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.jabref.model.entry;
22

3-
import java.util.Arrays;
43
import java.util.stream.Stream;
54

65
import org.junit.jupiter.params.ParameterizedTest;
@@ -9,32 +8,26 @@
98

109
import static org.junit.jupiter.api.Assertions.assertEquals;
1110

12-
public class AuthorListParameterTest {
11+
class AuthorListParameterTest {
1312

1413
private static Stream<Arguments> data() {
15-
1614
return Stream.of(
17-
Arguments.of("王, 军", authorList(new Author("军", "军.", null, "王", null))),
18-
Arguments.of("Doe, John", authorList(new Author("John", "J.", null, "Doe", null))),
19-
Arguments.of("von Berlichingen zu Hornberg, Johann Gottfried",
20-
authorList(new Author("Johann Gottfried", "J. G.", "von", "Berlichingen zu Hornberg", null))),
21-
//Arguments.of("Robert and Sons, Inc.", authorList(new Author(null, null, null, "Robert and Sons, Inc.", null))),
22-
//Arguments.of("al-Ṣāliḥ, Abdallāh", authorList(new Author("Abdallāh", "A.", null, "al-Ṣāliḥ", null))),
23-
Arguments.of("de la Vallée Poussin, Jean Charles Gabriel",
24-
authorList(new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null))),
25-
Arguments.of("de la Vallée Poussin, J. C. G.",
26-
authorList(new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null))),
27-
Arguments.of("{K}ent-{B}oswell, E. S.", authorList(new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null))));
28-
}
29-
30-
private static AuthorList authorList(Author author) {
31-
return new AuthorList(Arrays.asList(author));
15+
Arguments.of("王, 军", new Author("军", "军.", null, "王", null)),
16+
Arguments.of("Doe, John", new Author("John", "J.", null, "Doe", null)),
17+
Arguments.of("von Berlichingen zu Hornberg, Johann Gottfried", new Author("Johann Gottfried", "J. G.", "von", "Berlichingen zu Hornberg", null)),
18+
//Arguments.of("Robert and Sons, Inc.", new Author(null, null, null, "Robert and Sons, Inc.", null))),
19+
//Arguments.of("al-Ṣāliḥ, Abdallāh", new Author("Abdallāh", "A.", null, "al-Ṣāliḥ", null))),
20+
Arguments.of("de la Vallée Poussin, Jean Charles Gabriel", new Author("Jean Charles Gabriel", "J. C. G.", "de la", "Vallée Poussin", null)),
21+
Arguments.of("de la Vallée Poussin, J. C. G.", new Author("J. C. G.", "J. C. G.", "de la", "Vallée Poussin", null)),
22+
Arguments.of("{K}ent-{B}oswell, E. S.", new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null)),
23+
Arguments.of("Uhlenhaut, N Henriette", new Author("N Henriette", "N. H.", null, "Uhlenhaut", null))
24+
);
3225
}
3326

3427
@ParameterizedTest
3528
@MethodSource("data")
36-
void parseCorrectly(String authorsString, AuthorList authorsParsed) {
29+
void parseCorrectly(String authorsString, Author authorsParsed) {
3730
AuthorListParser parser = new AuthorListParser();
38-
assertEquals(authorsParsed, parser.parse(authorsString));
31+
assertEquals(new AuthorList(authorsParsed), parser.parse(authorsString));
3932
}
4033
}

src/test/java/org/jabref/model/entry/AuthorTest.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44

55
import static org.junit.jupiter.api.Assertions.assertEquals;
66

7-
public class AuthorTest {
7+
class AuthorTest {
88

99
@Test
10-
public void addDotIfAbbreviationAddDot() {
10+
void addDotIfAbbreviationAddDot() {
1111
assertEquals("O.", Author.addDotIfAbbreviation("O"));
1212
assertEquals("A. O.", Author.addDotIfAbbreviation("AO"));
1313
assertEquals("A. O.", Author.addDotIfAbbreviation("AO."));
@@ -16,7 +16,12 @@ public void addDotIfAbbreviationAddDot() {
1616
}
1717

1818
@Test
19-
public void addDotIfAbbreviationDoNotAddDot() {
19+
void addDotIfAbbreviationDoesNotAddMultipleSpaces() {
20+
assertEquals("A. O.", Author.addDotIfAbbreviation("A O"));
21+
}
22+
23+
@Test
24+
void addDotIfAbbreviationDoNotAddDot() {
2025
assertEquals("O.", Author.addDotIfAbbreviation("O."));
2126
assertEquals("A. O.", Author.addDotIfAbbreviation("A. O."));
2227
assertEquals("A.-O.", Author.addDotIfAbbreviation("A.-O."));
@@ -32,7 +37,6 @@ public void addDotIfAbbreviationDoNotAddDot() {
3237
assertEquals("{\\'{E}}douard", Author.addDotIfAbbreviation("{\\'{E}}douard"));
3338
assertEquals("J{\\\"o}rg", Author.addDotIfAbbreviation("J{\\\"o}rg"));
3439
assertEquals("Moore, O. and O. Moore", Author.addDotIfAbbreviation("Moore, O. and O. Moore"));
35-
assertEquals("Moore, O. and O. Moore and Moore, O. O.",
36-
Author.addDotIfAbbreviation("Moore, O. and O. Moore and Moore, O. O."));
40+
assertEquals("Moore, O. and O. Moore and Moore, O. O.", Author.addDotIfAbbreviation("Moore, O. and O. Moore and Moore, O. O."));
3741
}
3842
}

0 commit comments

Comments
 (0)