forked from langchain4j/langchain4j
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed tests that fail when running from IDE (langchain4j#54)
Also added HTML document type.
- Loading branch information
1 parent
ba45d54
commit fd83555
Showing
4 changed files
with
58 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 14 additions & 15 deletions
29
langchain4j/src/test/java/dev/langchain4j/data/document/parser/TextDocumentParserTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,37 @@ | ||
package dev.langchain4j.data.document.parser; | ||
|
||
import dev.langchain4j.data.document.Document; | ||
import dev.langchain4j.data.document.source.FileSystemSource; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Paths; | ||
import java.io.InputStream; | ||
|
||
import static dev.langchain4j.data.document.DocumentType.TXT; | ||
import static java.nio.charset.StandardCharsets.ISO_8859_1; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
class TextDocumentParserTest { | ||
|
||
@Test | ||
// TODO This test fails when running it directly in IDE, but works when running in maven | ||
void should_parse_with_utf8_charset_by_default() throws IOException { | ||
void should_parse_with_utf8_charset_by_default() { | ||
|
||
FileSystemSource source = FileSystemSource.from(Paths.get("src/test/resources/test-file-utf8.txt")); | ||
TextDocumentParser parser = new TextDocumentParser(); | ||
TextDocumentParser parser = new TextDocumentParser(TXT); | ||
InputStream inputStream = getClass().getClassLoader().getResourceAsStream("test-file-utf8.txt"); | ||
|
||
Document document = parser.parse(source.inputStream()); | ||
Document document = parser.parse(inputStream); | ||
|
||
assertThat(document.text()).isEqualTo("test\ncontent"); | ||
assertThat(document.text()).isEqualToIgnoringWhitespace("test content"); | ||
assertThat(document.metadata().get("document_type")).isEqualTo("TXT"); | ||
} | ||
|
||
@Test | ||
// TODO This test fails when running it directly in IDE, but works when running in maven | ||
void should_parse_with_specified_charset() throws IOException { | ||
void should_parse_with_specified_charset() { | ||
|
||
FileSystemSource source = FileSystemSource.from(Paths.get("src/test/resources/test-file-iso-8859-1.txt")); | ||
TextDocumentParser parser = new TextDocumentParser(ISO_8859_1); | ||
TextDocumentParser parser = new TextDocumentParser(TXT, ISO_8859_1); | ||
InputStream inputStream = getClass().getClassLoader().getResourceAsStream("test-file-iso-8859-1.txt"); | ||
|
||
Document document = parser.parse(source.inputStream()); | ||
Document document = parser.parse(inputStream); | ||
|
||
assertThat(document.text()).isEqualTo("test\ncontent"); | ||
assertThat(document.text()).isEqualToIgnoringWhitespace("test content"); | ||
assertThat(document.metadata().get("document_type")).isEqualTo("TXT"); | ||
} | ||
} |