Skip to content

Commit

Permalink
fix(Google AI Gemini) — Fixed wrong mapping between function executio…
Browse files Browse the repository at this point in the history
…n requests and function execution responses. (langchain4j#1802)

Added a test using `AiServices`.
Function execution requests were mapped to function execution responses.
  • Loading branch information
glaforge authored Sep 23, 2024
1 parent a91ea8a commit 6bd851f
Show file tree
Hide file tree
Showing 10 changed files with 511 additions and 4 deletions.
83 changes: 83 additions & 0 deletions docs/docs/integrations/document-loaders/google-cloud-storage.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
---
sidebar_position: 2
---

# Google Cloud Storage

A Google Cloud Storage (GCS) document loader that allows you to load documents from storage buckets.

## Maven Dependency

```xml
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-loader-google-cloud-storage</artifactId>
<version>0.34.0</version>
</dependency>
```

## APIs

- `GoogleCloudStorageDocumentLoader`

## Authentication

The authentication should be handled transparently for you:
* If your application is running on Google Cloud Platform (Cloud Run, App Engine, Compute Engine, etc)
* When running locally on your machine, if you are already authenticated via Google's `gcloud` SDK

You should just create a loader specifying just your project ID:

```java
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder()
.project(System.getenv("GCP_PROJECT_ID"))
.build();
```

Otherwise, it's possible to specify `Credentials`, if you have downloaded a service account key, and exported an environment variable pointing to it:

```java
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder()
.project(System.getenv("GCP_PROJECT_ID"))
.credentials(GoogleCredentials.fromStream(new FileInputStream(System.getenv("GOOGLE_APPLICATION_CREDENTIALS"))))
.build();
```

Learn more about [credentials](https://cloud.google.com/docs/authentication/application-default-credentials).

When accessing a public bucket, you shouldn't need to authenticate.

## Examples

### Load a single file from a GCS bucket

```java
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder()
.project(System.getenv("GCP_PROJECT_ID"))
.build();

Document document = gcsLoader.loadDocument("BUCKET_NAME", "FILE_NAME.txt", new TextDocumentParser());
```

### Load all files from a GCS bucket

```java
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder()
.project(System.getenv("GCP_PROJECT_ID"))
.build();

List<Document> documents = gcsLoader.loadDocuments("BUCKET_NAME", new TextDocumentParser());
```

### Load all files from a GCS bucket with a glob pattern

```java
GoogleCloudStorageDocumentLoader gcsLoader = GoogleCloudStorageDocumentLoader.builder()
.project(System.getenv("GCP_PROJECT_ID"))
.build();

List<Document> documents = gcsLoader.loadDocuments("BUCKET_NAME", "*.txt", new TextDocumentParser());
```

For more code samples, please have a look at the integration test class:
- [GoogleCloudStorageDocumentLoaderIT](https://github.com/langchain4j/langchain4j/blob/main/document-loaders/langchain4j-document-loader-google-cloud-storage/src/test/java/dev/langchain4j/data/document/loader/gcs/GoogleCloudStorageDocumentLoaderIT.java)
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-parent</artifactId>
<version>0.35.0-SNAPSHOT</version>
<relativePath>../../langchain4j-parent/pom.xml</relativePath>
</parent>

<artifactId>langchain4j-document-loader-google-cloud-storage</artifactId>
<name>LangChain4j :: Document loader :: Google Cloud Storage</name>

<dependencies>

<!-- Google Cloud Storage -->

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
</dependency>

<!-- LangChain4j -->

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</dependency>

<!--- Test dependencies -->

<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.22.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>

</dependencies>

<dependencyManagement>
<dependencies>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>libraries-bom</artifactId>
<scope>import</scope>
<type>pom</type>
<version>26.46.0</version>
</dependency>
</dependencies>
</dependencyManagement>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<id>enforce</id>
<configuration>
<rules>
<dependencyConvergence/>
</rules>
</configuration>
<goals>
<goal>enforce</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package dev.langchain4j.data.document.loader.gcs;

import com.google.auth.Credentials;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import com.google.api.gax.paging.Page;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentLoader;
import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.data.document.source.GcsSource;

import java.util.ArrayList;
import java.util.List;

import static dev.langchain4j.internal.ValidationUtils.ensureNotBlank;
import static dev.langchain4j.internal.ValidationUtils.ensureNotNull;

/**
* Google Cloud Storage Document Loader to load documents from Google Cloud Storage buckets.
*/
public class GoogleCloudStorageDocumentLoader {

private final Storage storage;

private GoogleCloudStorageDocumentLoader(String project, Credentials credentials) {
StorageOptions.Builder storageBuilder = StorageOptions.newBuilder();

if (project != null) {
storageBuilder.setProjectId(ensureNotBlank(project, "project"));
}

if (credentials != null) {
storageBuilder.setCredentials(credentials);
}

this.storage = storageBuilder.build().getService();
}

/**
* Loads a single document from the specified Google Cloud Storage bucket based on the specified object key.
*
* @param bucket GCS bucket to load from.
* @param objectName The key of the GCS object which should be loaded.
* @param parser The parser to be used for parsing text from the object.
* @return A document containing the content of the GCS object.
*/
public Document loadDocument(String bucket, String objectName, DocumentParser parser) {
Blob blob = storage.get(bucket, objectName);
if (blob == null) {
throw new IllegalArgumentException("Object gs://" + bucket + "/" + objectName + " couldn't be found.");
}

GcsSource gcsSource = new GcsSource(blob);
return DocumentLoader.load(gcsSource, ensureNotNull(parser, "parser"));
}

/**
* Load a list of documents from the specified bucket, filtered with a glob pattern.
*
* @param bucket the bucket to load files from
* @param globPattern filter only files matching the glob pattern, see https://cloud.google.com/storage/docs/json_api/v1/objects/list#list-object-glob
* @param parser the parser to use to parse the document
* @return A list of documents from the bucket that match the glob pattern.
*/
public List<Document> loadDocuments(String bucket, String globPattern, DocumentParser parser) {
Page<Blob> blobs = globPattern != null ?
storage.list(bucket, Storage.BlobListOption.currentDirectory(), Storage.BlobListOption.matchGlob(globPattern)) :
storage.list(bucket, Storage.BlobListOption.currentDirectory());

List<Document> documents = new ArrayList<>();

for (Blob blob : blobs.iterateAll()) {
GcsSource gcsSource = new GcsSource(blob);
documents.add(DocumentLoader.load(gcsSource, ensureNotNull(parser, "parser")));
}

return documents;
}

/**
* Loads all documents from an GCS bucket.
*
* @param bucket the bucket to load from.
* @param parser The parser to be used for parsing text from the object.
* @return A list of documents.
*/
public List<Document> loadDocuments(String bucket, DocumentParser parser) {
return loadDocuments(bucket, null, parser);
}

public static Builder builder() {
return new Builder();
}

public static class Builder {
private String project;
private Credentials credentials;

public Builder project(String project) {
this.project = project;
return this;
}

public Builder credentials(Credentials credentials) {
this.credentials = credentials;
return this;
}

public GoogleCloudStorageDocumentLoader build() {
return new GoogleCloudStorageDocumentLoader(project, credentials);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package dev.langchain4j.data.document.source;

import com.google.cloud.ReadChannel;
import com.google.cloud.storage.Blob;
import dev.langchain4j.data.document.DocumentSource;
import dev.langchain4j.data.document.Metadata;

import java.io.IOException;
import java.io.InputStream;
import java.nio.channels.Channels;

public class GcsSource implements DocumentSource {

private final InputStream inputStream;
private final Metadata metadata;

public GcsSource(Blob blob) {
this.metadata = getMetadataForBlob(blob);
this.inputStream = Channels.newInputStream(blob.reader());
}

@Override
public InputStream inputStream() throws IOException {
return inputStream;
}

@Override
public Metadata metadata() {
return metadata;
}

private static Metadata getMetadataForBlob(Blob blob) {
Metadata metadata = new Metadata();
metadata.put("source", "gs://" + blob.getBucket() + "/" + blob.getName());
metadata.put("bucket", blob.getBucket());
metadata.put("name", blob.getName());
metadata.put("contentType", blob.getContentType());
metadata.put("size", blob.getSize());
metadata.put("createTime", blob.getCreateTimeOffsetDateTime().toString());
metadata.put("updateTime", blob.getUpdateTimeOffsetDateTime().toString());
return metadata;
}
}
Loading

0 comments on commit 6bd851f

Please sign in to comment.