forked from langchain4j/langchain4j
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(Google AI Gemini) — Fixed wrong mapping between function executio…
…n requests and function execution responses. (langchain4j#1802) Added a test using `AiServices`. Function execution requests were mapped to function execution responses.
- Loading branch information
Showing
10 changed files
with
511 additions
and
4 deletions.
There are no files selected for viewing
83 changes: 83 additions & 0 deletions
83
docs/docs/integrations/document-loaders/google-cloud-storage.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
--- | ||
sidebar_position: 2 | ||
--- | ||
|
||
# Google Cloud Storage | ||
|
||
A Google Cloud Storage (GCS) document loader that allows you to load documents from storage buckets. | ||
|
||
## Maven Dependency | ||
|
||
```xml | ||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-document-loader-google-cloud-storage</artifactId> | ||
<version>0.34.0</version> | ||
</dependency> | ||
``` | ||
|
||
## APIs | ||
|
||
- `GoogleCloudStorageDocumentLoader` | ||
|
||
## Authentication | ||
|
||
The authentication should be handled transparently for you: | ||
* If your application is running on Google Cloud Platform (Cloud Run, App Engine, Compute Engine, etc) | ||
* When running locally on your machine, if you are already authenticated via Google's `gcloud` SDK | ||
|
||
You should just create a loader specifying just your project ID: | ||
|
||
```java | ||
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder() | ||
.project(System.getenv("GCP_PROJECT_ID")) | ||
.build(); | ||
``` | ||
|
||
Otherwise, it's possible to specify `Credentials`, if you have downloaded a service account key, and exported an environment variable pointing to it: | ||
|
||
```java | ||
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder() | ||
.project(System.getenv("GCP_PROJECT_ID")) | ||
.credentials(GoogleCredentials.fromStream(new FileInputStream(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")))) | ||
.build(); | ||
``` | ||
|
||
Learn more about [credentials](https://cloud.google.com/docs/authentication/application-default-credentials). | ||
|
||
When accessing a public bucket, you shouldn't need to authenticate. | ||
|
||
## Examples | ||
|
||
### Load a single file from a GCS bucket | ||
|
||
```java | ||
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder() | ||
.project(System.getenv("GCP_PROJECT_ID")) | ||
.build(); | ||
|
||
Document document = gcsLoader.loadDocument("BUCKET_NAME", "FILE_NAME.txt", new TextDocumentParser()); | ||
``` | ||
|
||
### Load all files from a GCS bucket | ||
|
||
```java | ||
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder() | ||
.project(System.getenv("GCP_PROJECT_ID")) | ||
.build(); | ||
|
||
List<Document> documents = gcsLoader.loadDocuments("BUCKET_NAME", new TextDocumentParser()); | ||
``` | ||
|
||
### Load all files from a GCS bucket with a glob pattern | ||
|
||
```java | ||
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder() | ||
.project(System.getenv("GCP_PROJECT_ID")) | ||
.build(); | ||
|
||
List<Document> documents = gcsLoader.loadDocuments("BUCKET_NAME", "*.txt", new TextDocumentParser()); | ||
``` | ||
|
||
For more code samples, please have a look at the integration test class: | ||
- [GoogleCloudStorageDocumentLoaderIT](https://github.com/langchain4j/langchain4j/blob/main/document-loaders/langchain4j-document-loader-google-cloud-storage/src/test/java/dev/langchain4j/data/document/loader/gcs/GoogleCloudStorageDocumentLoaderIT.java) |
97 changes: 97 additions & 0 deletions
97
document-loaders/langchain4j-document-loader-google-cloud-storage/pom.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-parent</artifactId> | ||
<version>0.35.0-SNAPSHOT</version> | ||
<relativePath>../../langchain4j-parent/pom.xml</relativePath> | ||
</parent> | ||
|
||
<artifactId>langchain4j-document-loader-google-cloud-storage</artifactId> | ||
<name>LangChain4j :: Document loader :: Google Cloud Storage</name> | ||
|
||
<dependencies> | ||
|
||
<!-- Google Cloud Storage --> | ||
|
||
<dependency> | ||
<groupId>com.google.cloud</groupId> | ||
<artifactId>google-cloud-storage</artifactId> | ||
</dependency> | ||
|
||
<!-- LangChain4j --> | ||
|
||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-core</artifactId> | ||
</dependency> | ||
|
||
<!--- Test dependencies --> | ||
|
||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-api</artifactId> | ||
<version>2.22.0</version> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.junit.jupiter</groupId> | ||
<artifactId>junit-jupiter-engine</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.assertj</groupId> | ||
<artifactId>assertj-core</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
|
||
</dependencies> | ||
|
||
<dependencyManagement> | ||
<dependencies> | ||
<dependency> | ||
<groupId>com.google.cloud</groupId> | ||
<artifactId>libraries-bom</artifactId> | ||
<scope>import</scope> | ||
<type>pom</type> | ||
<version>26.46.0</version> | ||
</dependency> | ||
</dependencies> | ||
</dependencyManagement> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-enforcer-plugin</artifactId> | ||
<version>3.5.0</version> | ||
<executions> | ||
<execution> | ||
<id>enforce</id> | ||
<configuration> | ||
<rules> | ||
<dependencyConvergence/> | ||
</rules> | ||
</configuration> | ||
<goals> | ||
<goal>enforce</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
</project> |
115 changes: 115 additions & 0 deletions
115
.../main/java/dev/langchain4j/data/document/loader/gcs/GoogleCloudStorageDocumentLoader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package dev.langchain4j.data.document.loader.gcs; | ||
|
||
import com.google.auth.Credentials; | ||
import com.google.cloud.storage.Blob; | ||
import com.google.cloud.storage.Storage; | ||
import com.google.cloud.storage.StorageOptions; | ||
import com.google.api.gax.paging.Page; | ||
|
||
import dev.langchain4j.data.document.Document; | ||
import dev.langchain4j.data.document.DocumentLoader; | ||
import dev.langchain4j.data.document.DocumentParser; | ||
import dev.langchain4j.data.document.source.GcsSource; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import static dev.langchain4j.internal.ValidationUtils.ensureNotBlank; | ||
import static dev.langchain4j.internal.ValidationUtils.ensureNotNull; | ||
|
||
/** | ||
* Google Cloud Storage Document Loader to load documents from Google Cloud Storage buckets. | ||
*/ | ||
public class GoogleCloudStorageDocumentLoader { | ||
|
||
private final Storage storage; | ||
|
||
private GoogleCloudStorageDocumentLoader(String project, Credentials credentials) { | ||
StorageOptions.Builder storageBuilder = StorageOptions.newBuilder(); | ||
|
||
if (project != null) { | ||
storageBuilder.setProjectId(ensureNotBlank(project, "project")); | ||
} | ||
|
||
if (credentials != null) { | ||
storageBuilder.setCredentials(credentials); | ||
} | ||
|
||
this.storage = storageBuilder.build().getService(); | ||
} | ||
|
||
/** | ||
* Loads a single document from the specified Google Cloud Storage bucket based on the specified object key. | ||
* | ||
* @param bucket GCS bucket to load from. | ||
* @param objectName The key of the GCS object which should be loaded. | ||
* @param parser The parser to be used for parsing text from the object. | ||
* @return A document containing the content of the GCS object. | ||
*/ | ||
public Document loadDocument(String bucket, String objectName, DocumentParser parser) { | ||
Blob blob = storage.get(bucket, objectName); | ||
if (blob == null) { | ||
throw new IllegalArgumentException("Object gs://" + bucket + "/" + objectName + " couldn't be found."); | ||
} | ||
|
||
GcsSource gcsSource = new GcsSource(blob); | ||
return DocumentLoader.load(gcsSource, ensureNotNull(parser, "parser")); | ||
} | ||
|
||
/** | ||
* Load a list of documents from the specified bucket, filtered with a glob pattern. | ||
* | ||
* @param bucket the bucket to load files from | ||
* @param globPattern filter only files matching the glob pattern, see https://cloud.google.com/storage/docs/json_api/v1/objects/list#list-object-glob | ||
* @param parser the parser to use to parse the document | ||
* @return A list of documents from the bucket that match the glob pattern. | ||
*/ | ||
public List<Document> loadDocuments(String bucket, String globPattern, DocumentParser parser) { | ||
Page<Blob> blobs = globPattern != null ? | ||
storage.list(bucket, Storage.BlobListOption.currentDirectory(), Storage.BlobListOption.matchGlob(globPattern)) : | ||
storage.list(bucket, Storage.BlobListOption.currentDirectory()); | ||
|
||
List<Document> documents = new ArrayList<>(); | ||
|
||
for (Blob blob : blobs.iterateAll()) { | ||
GcsSource gcsSource = new GcsSource(blob); | ||
documents.add(DocumentLoader.load(gcsSource, ensureNotNull(parser, "parser"))); | ||
} | ||
|
||
return documents; | ||
} | ||
|
||
/** | ||
* Loads all documents from an GCS bucket. | ||
* | ||
* @param bucket the bucket to load from. | ||
* @param parser The parser to be used for parsing text from the object. | ||
* @return A list of documents. | ||
*/ | ||
public List<Document> loadDocuments(String bucket, DocumentParser parser) { | ||
return loadDocuments(bucket, null, parser); | ||
} | ||
|
||
public static Builder builder() { | ||
return new Builder(); | ||
} | ||
|
||
public static class Builder { | ||
private String project; | ||
private Credentials credentials; | ||
|
||
public Builder project(String project) { | ||
this.project = project; | ||
return this; | ||
} | ||
|
||
public Builder credentials(Credentials credentials) { | ||
this.credentials = credentials; | ||
return this; | ||
} | ||
|
||
public GoogleCloudStorageDocumentLoader build() { | ||
return new GoogleCloudStorageDocumentLoader(project, credentials); | ||
} | ||
} | ||
} |
43 changes: 43 additions & 0 deletions
43
...er-google-cloud-storage/src/main/java/dev/langchain4j/data/document/source/GcsSource.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package dev.langchain4j.data.document.source; | ||
|
||
import com.google.cloud.ReadChannel; | ||
import com.google.cloud.storage.Blob; | ||
import dev.langchain4j.data.document.DocumentSource; | ||
import dev.langchain4j.data.document.Metadata; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.nio.channels.Channels; | ||
|
||
public class GcsSource implements DocumentSource { | ||
|
||
private final InputStream inputStream; | ||
private final Metadata metadata; | ||
|
||
public GcsSource(Blob blob) { | ||
this.metadata = getMetadataForBlob(blob); | ||
this.inputStream = Channels.newInputStream(blob.reader()); | ||
} | ||
|
||
@Override | ||
public InputStream inputStream() throws IOException { | ||
return inputStream; | ||
} | ||
|
||
@Override | ||
public Metadata metadata() { | ||
return metadata; | ||
} | ||
|
||
private static Metadata getMetadataForBlob(Blob blob) { | ||
Metadata metadata = new Metadata(); | ||
metadata.put("source", "gs://" + blob.getBucket() + "/" + blob.getName()); | ||
metadata.put("bucket", blob.getBucket()); | ||
metadata.put("name", blob.getName()); | ||
metadata.put("contentType", blob.getContentType()); | ||
metadata.put("size", blob.getSize()); | ||
metadata.put("createTime", blob.getCreateTimeOffsetDateTime().toString()); | ||
metadata.put("updateTime", blob.getUpdateTimeOffsetDateTime().toString()); | ||
return metadata; | ||
} | ||
} |
Oops, something went wrong.