Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ It was born from the need to create an enterprise QA application.
- [Retrieval](#retrieval)
- [Retrieve Documents from Elasticsearch Index](#retrieve-documents-from-elasticsearch-index)
- [Retrieve Documents from Lucene Directory](#retrieve-documents-from-lucene-directory)
- [Retrieve Documents from a relational database](#retrieve-documents-from-rdbms)
- [Writer](#writer)
- [Write Documents to Elasticsearch Index](#write-documents-to-elasticsearch-index)
- [Write Documents to Lucene Directory](#write-documents-to-lucene-directory)
Expand Down Expand Up @@ -154,6 +155,24 @@ RetrievalChain retrievalChain = new LuceneRetrievalChain(directory, 2 /* max cou
Stream<Map<String, String>> retrievedDocuments = retrievalChain.run("my question?");
```


##### Retrieve Documents from RDBMS
See [JdbcRetrievalChainIT](src/test/java/com/github/hakenadu/javalangchains/chains/data/retrieval/JdbcRetrievalChainIT.java)

```java
Supplier<Connection> connectionSupplier = () -> {
try {
return DriverManager.getConnection(connectionString, username, password);
} catch (SQLException e) {
throw new IllegalStateException("error creating database connection", e);
}
};

RetrievalChain retrievalChain = new JdbcRetrievalChain(connectionSupplier, 2 /* max count of retrieved documents */);

Stream<Map<String, String>> retrievedDocuments = retrievalChain.run("my question?");
```

#### Writer

##### Write Documents to Elasticsearch Index
Expand Down
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -266,5 +266,11 @@
<version>${junit.jupiter.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.6.0</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.hakenadu.javalangchains.chains.base;

import java.util.NoSuchElementException;
import java.util.stream.Stream;

import com.github.hakenadu.javalangchains.chains.Chain;
Expand All @@ -13,6 +14,6 @@ public final class StreamUnwrappingChain<T> implements Chain<Stream<T>, T> {

@Override
public T run(final Stream<T> input) {
return input.findAny().orElseThrow();
return input.findAny().orElseThrow(NoSuchElementException::new);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package com.github.hakenadu.javalangchains.chains.data.retrieval;

import com.github.hakenadu.javalangchains.util.PromptConstants;
import org.apache.commons.lang3.tuple.Pair;

import javax.swing.text.Document;
import java.sql.*;
import java.util.*;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Stream;

public class JdbcRetrievalChain extends RetrievalChain {
/**
* supplier for lazy connection creation on chain invocation
*/
private final Supplier<Connection> connectionSupplier;
/**
* this {@link Function} accepts the user's question and provides the
* corresponding SQL statement to execute
*/
private final Function<String, Pair<String, List<Object>>> queryBuilder;
/**
* transforms a {@link ResultSet} to a document. default implementation in {@link #documentFromResultSet(ResultSet)}
*/
private final DocumentCreator documentCreator;

/**
* Creates an instance of {@link JdbcRetrievalChain}
*
* @param connectionSupplier {@link #connectionSupplier}
* @param documentCreator {@link #documentCreator}
* @param queryBuilder {@link #queryBuilder}
* @param maxDocumentCount {@link RetrievalChain#getMaxDocumentCount()}
*/
public JdbcRetrievalChain(Supplier<Connection> connectionSupplier, Function<String, Pair<String, List<Object>>> queryBuilder, DocumentCreator documentCreator, int maxDocumentCount) {
super(maxDocumentCount);
this.connectionSupplier = connectionSupplier;
this.documentCreator = documentCreator;
this.queryBuilder = queryBuilder;
}

/**
* Creates an instance of {@link JdbcRetrievalChain} using {@link #createQuery(String, String, String)}
* for SQL statement creation.
*
* @param connectionSupplier {@link #connectionSupplier}
* @param table Name of the document table used for query creation
* @param maxDocumentCount {@link RetrievalChain#getMaxDocumentCount()}
*/
public JdbcRetrievalChain(Supplier<Connection> connectionSupplier, String table, String contentColumn, int maxDocumentCount) {
this(connectionSupplier, (question) -> createQuery(question, table, contentColumn), JdbcRetrievalChain::documentFromResultSet, maxDocumentCount);
}

/**
* Creates an instance of {@link JdbcRetrievalChain} using {@link #createQuery(String, String, String)}
* for SQL statement creation and `content`, `source` as the result columns and `Documents` as the table.
*
* @param connectionSupplier {@link #connectionSupplier}
* @param maxDocumentCount {@link RetrievalChain#getMaxDocumentCount()}
*/
public JdbcRetrievalChain(Supplier<Connection> connectionSupplier, int maxDocumentCount) {
this(connectionSupplier, "Documents", "content", maxDocumentCount);
}

@Override
public Stream<Map<String, String>> run(String input) {
Connection connection = connectionSupplier.get();

Pair<String, List<Object>> query = queryBuilder.apply(input);
final String sql = query.getLeft();
final List<Object> params = query.getRight();

try (PreparedStatement statement = connection.prepareStatement(sql)) {
statement.setMaxRows(getMaxDocumentCount());
for (int i = 0; i < params.size(); i++) {
statement.setObject(i + 1, params.get(i));
}
ResultSet resultSet = statement.executeQuery();
List<Map<String, String>> queryResult = new ArrayList<>();
while (resultSet.next()) {
Map<String, String> documentMap = documentCreator.create(resultSet);
documentMap.put(PromptConstants.QUESTION, input);
queryResult.add(documentMap);
}
return queryResult.stream();
} catch (SQLException e) {
throw new IllegalStateException("error creating / executing database statement", e);
}
}

/**
* Transforms a {@link ResultSet} entry to a document containing the corresponding prompt info.
*
* @param resultSet JDBC {@link ResultSet}
* @return transformed document map
* @throws SQLException if a column cannot be retrieved from the result set
*/
private static Map<String, String> documentFromResultSet(ResultSet resultSet) throws SQLException {
ResultSetMetaData metaData = resultSet.getMetaData();

Map<String, String> documentMap = new HashMap<>();

for(int i = 1; i <= metaData.getColumnCount(); i++) {
String columnName = metaData.getColumnName(i);
Object value = resultSet.getObject(i);
documentMap.put(columnName, value.toString());
}

return documentMap;
}

/**
* Internal query creator that acts as a default when the user doesn't supply a customized function.
* Creates a SQL statement using a content likeness query.
*
* @param question Input / question of the user
* @param contentColumn Name of the column containing the document content
* @return a {@link Pair} of the SQL and parameters to bind
*/
private static Pair<String, List<Object>> createQuery(final String question, final String table, final String contentColumn) {
final String query = String.format("SELECT * FROM %s WHERE %s LIKE ANY (?)", table, contentColumn);
final String[] splitQuestion = Arrays.stream(question.split(question)).map(t -> String.format("%%%s%%", t)).toArray(String[]::new);
final List<Object> params = Collections.singletonList(splitQuestion);
return Pair.of(query, params);
}

/**
* Wrapper interface for Lambdas that act as document creators for a JDBC {@link ResultSet}.
* Advancing the {@link ResultSet} is not necessary as it is done by the {@link JdbcRetrievalChain}.
*/
@FunctionalInterface
public interface DocumentCreator {
Map<String, String> create(final ResultSet resultSet) throws SQLException;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.github.hakenadu.javalangchains.chains.data.retrieval;

import java.util.Arrays;
import java.util.List;

public class DocumentTestUtil {
// @formatter:off
public static final String DOCUMENT_1 =
"Subject: John Doe's Biography\n"
+ "Dear Reader,\n"
+ "I am delighted to present to you the biography of John Doe, a remarkable individual who has left an indelible mark on society. Born and raised in a small town, John displayed an insatiable curiosity and a thirst for knowledge from a young age. He excelled academically, earning scholarships that allowed him to attend prestigious universities and pursue his passion for scientific research.\n"
+ "Throughout his career, John made groundbreaking discoveries in the field of medicine, revolutionizing treatment options for previously incurable diseases. His relentless dedication and tireless efforts have saved countless lives and earned him numerous accolades, including the Nobel Prize in Medicine.\n"
+ "However, John's impact extends far beyond his professional accomplishments. He is known for his philanthropic endeavors, establishing charitable foundations that provide support and opportunities to underprivileged communities. John's compassion and commitment to social justice have inspired many to follow in his footsteps.\n"
+ "In his personal life, John is a devoted family man. He cherishes the time spent with his loving wife and children, always prioritizing their well-being amidst his demanding schedule. Despite his remarkable success, John remains humble and grounded, never forgetting his roots and always seeking ways to uplift those around him.\n"
+ "In conclusion, John Doe is not only a brilliant scientist and humanitarian but also a role model for future generations. His unwavering determination, kindness, and pursuit of excellence make him a true legend.\n"
+ "Sincerely,\n"
+ "Jane Doe";

public static final String DOCUMENT_2 =
"Subject: Invitation to John Doe's Art Exhibition\n"
+ "Dear Art Enthusiast,\n"
+ "We are pleased to invite you to a remarkable art exhibition featuring the mesmerizing works of John Doe. Renowned for his unique style and ability to capture the essence of emotions on canvas, John has curated a collection that will leave you awe-struck.\n"
+ "Drawing inspiration from his diverse life experiences, John's art tells compelling stories and invites viewers to delve into the depths of their imagination. Each stroke of the brush reveals a glimpse into his creative mind, conveying a range of emotions that resonate with the observer.\n"
+ "The exhibition will be held at the prestigious XYZ Art Gallery on [date] at [time]. It promises to be an evening filled with artistic brilliance, where you will have the opportunity to meet John Doe in person and gain insights into his creative process. Light refreshments will be served, providing a delightful ambiance for engaging discussions with fellow art enthusiasts.\n"
+ "Kindly RSVP by [RSVP date] to ensure your attendance at this exclusive event. We look forward to your presence and sharing this unforgettable artistic journey with you.\n"
+ "Yours sincerely,\n"
+ "Jane Doe";

public static final String DOCUMENT_3 =
"Subject: John Doe's Travel Memoir - Exploring the Unknown\n"
+ "Dear Adventurers,\n"
+ "Prepare to embark on an extraordinary journey as we delve into the captivating travel memoir of John Doe. Throughout his life, John has traversed the globe, seeking out the hidden gems and immersing himself in diverse cultures. His memoir is a testament to the transformative power of travel and the profound impact it can have on one's perspective.\n"
+ "From the bustling streets of Tokyo to the serene beaches of Bali, John's vivid descriptions transport readers to each destination, allowing them to experience the sights, sounds, and flavors firsthand. With a keen eye for detail and a genuine curiosity for the world, he uncovers the untold stories that lie beneath the surface, providing a fresh and unique perspective.\n"
+ "Through his encounters with locals, John unearths the beauty of human connection and the universal language of kindness. He shares anecdotes that will make you laugh, moments that will leave you in awe, and reflections that will inspire you to embark on your own adventures.\n"
+ "This travel memoir not only serves as a guide to off-the-beaten-path destinations but also as a reminder of the inherent beauty and diversity of our planet. It encourages readers to step out of their comfort zones, embrace new experiences, and celebrate the richness of different cultures.\n"
+ "Whether you are an avid traveler or an armchair explorer, John Doe's memoir is a captivating read that will ignite your wanderlust and leave you yearning for new horizons. Join him on this literary expedition and discover the world through his eyes.\n"
+ "Happy reading,\n"
+ "Jane Doe";

public static final List<String> DOCUMENTS = Arrays.asList(DocumentTestUtil.DOCUMENT_1, DocumentTestUtil.DOCUMENT_2, DocumentTestUtil.DOCUMENT_3);
// @formatter:on

private DocumentTestUtil() {

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package com.github.hakenadu.javalangchains.chains.data.retrieval;

import com.github.hakenadu.javalangchains.util.PromptConstants;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import java.sql.*;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import static org.junit.jupiter.api.Assertions.assertEquals;

@Disabled
public class JdbcRetrievalChainIT {
private static Supplier<Connection> connectionSupplier;

@BeforeAll
static void setup() throws SQLException {
final String connectionString = "jdbc:postgresql://localhost:5432/";
final String username = "postgres";
final String password = "admin";

Connection connection = DriverManager.getConnection(connectionString, username, password);

Statement setupStatement = connection.createStatement();

ResultSet dbResult = setupStatement.executeQuery("SELECT datname FROM pg_catalog.pg_database WHERE datname='langchain_test'");
if(dbResult.next()) {
setupStatement.execute("DROP DATABASE langchain_test");
}

setupStatement.execute("CREATE DATABASE langchain_test");

setupStatement.close();

connection.setCatalog("langchain_test");

Statement createTableStatement = connection.createStatement();
if(connection.getMetaData().getTables("langchain_test", null, null, new String[] {"TABLE"}).next()) {
createTableStatement.execute("DROP TABLE Documents");
}
createTableStatement.execute("CREATE TABLE Documents (source VARCHAR PRIMARY KEY, content VARCHAR, additional_attribute INTEGER)");
createTableStatement.close();
for(int i = 0; i < DocumentTestUtil.DOCUMENTS.size(); i++) {
String content = DocumentTestUtil.DOCUMENTS.get(i);
PreparedStatement seedStatement = connection.prepareStatement("INSERT INTO Documents(source, content, additional_attribute) VALUES (?, ?, 1)");
seedStatement.setString(1, Integer.toString(i));
seedStatement.setString(2, content);
seedStatement.execute();
seedStatement.close();
}

connectionSupplier = () -> connection;
}

@Test
void testRun() throws SQLException {
JdbcRetrievalChain jdbcRetrievalChain = new JdbcRetrievalChain(connectionSupplier, 1);

final List<Map<String, String>> retrievedDocuments = jdbcRetrievalChain.run("who is john doe?")
.collect(Collectors.toList());
assertEquals(1, retrievedDocuments.size(), "incorrect number of retrieved documents");

Map<String, String> document = retrievedDocuments.get(0);
assertEquals("0", document.get("source"));
assertEquals("1", document.get("additional_attribute"));
assertEquals(DocumentTestUtil.DOCUMENT_1, document.get(PromptConstants.CONTENT));
assertEquals("who is john doe?", document.get(PromptConstants.QUESTION));
}
}
Loading