forked from langchain4j/langchain4j
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DashScope: Support Wanx Models (for text-generated images) (langchain…
…4j#1710) ## Change Alibaba uses Wanx models to support text-to-image features (not Qwen), and provides services on DashScope. See: https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-wanxiang Integrate them into langchain4j-dashscope as ImageModel. ## General checklist <!-- Please double-check the following points and mark them like this: [X] --> - [X] There are no breaking changes - [X] I have added unit and integration tests for my change - [X] I have manually run all the unit and integration tests in the module I have added/changed, and they are all green - [ ] I have manually run all the unit and integration tests in the [core](https://github.com/langchain4j/langchain4j/tree/main/langchain4j-core) and [main](https://github.com/langchain4j/langchain4j/tree/main/langchain4j) modules, and they are all green
- Loading branch information
1 parent
4ee7b8a
commit c14c86c
Showing
14 changed files
with
409 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
langchain4j-dashscope/src/main/java/dev/langchain4j/model/dashscope/WanxHelper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
package dev.langchain4j.model.dashscope; | ||
|
||
import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesisOutput; | ||
import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesisResult; | ||
import com.alibaba.dashscope.exception.NoApiKeyException; | ||
import com.alibaba.dashscope.utils.OSSUtils; | ||
import dev.langchain4j.data.image.Image; | ||
import dev.langchain4j.internal.Utils; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.nio.file.StandardCopyOption; | ||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
||
public class WanxHelper { | ||
static List<Image> imagesFrom(ImageSynthesisResult result) { | ||
return Optional.of(result) | ||
.map(ImageSynthesisResult::getOutput) | ||
.map(ImageSynthesisOutput::getResults) | ||
.orElse(Collections.emptyList()) | ||
.stream() | ||
.map(resultMap -> resultMap.get("url")) | ||
.map(url -> Image.builder().url(url).build()) | ||
.collect(Collectors.toList()); | ||
} | ||
|
||
static String imageUrl(Image image, String model, String apiKey) { | ||
String imageUrl; | ||
|
||
if (image.url() != null) { | ||
imageUrl = image.url().toString(); | ||
} else if (Utils.isNotNullOrBlank(image.base64Data())) { | ||
String filePath = saveDataAsTemporaryFile(image.base64Data(), image.mimeType()); | ||
try { | ||
imageUrl = OSSUtils.upload(model, filePath, apiKey); | ||
} catch (NoApiKeyException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} else { | ||
throw new IllegalArgumentException("Failed to get image url from " + image); | ||
} | ||
|
||
return imageUrl; | ||
} | ||
|
||
static String saveDataAsTemporaryFile(String base64Data, String mimeType) { | ||
String tmpDir = System.getProperty("java.io.tmpdir", "/tmp"); | ||
String tmpFileName = UUID.randomUUID().toString(); | ||
if (Utils.isNotNullOrBlank(mimeType)) { | ||
// e.g. "image/png", "image/jpeg"... | ||
int lastSlashIndex = mimeType.lastIndexOf("/"); | ||
if (lastSlashIndex >= 0 && lastSlashIndex < mimeType.length() - 1) { | ||
String fileSuffix = mimeType.substring(lastSlashIndex + 1); | ||
tmpFileName = tmpFileName + "." + fileSuffix; | ||
} | ||
} | ||
|
||
Path tmpFilePath = Paths.get(tmpDir, tmpFileName); | ||
byte[] data = Base64.getDecoder().decode(base64Data); | ||
try { | ||
Files.copy(new ByteArrayInputStream(data), tmpFilePath, StandardCopyOption.REPLACE_EXISTING); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
return tmpFilePath.toAbsolutePath().toString(); | ||
} | ||
} |
150 changes: 150 additions & 0 deletions
150
langchain4j-dashscope/src/main/java/dev/langchain4j/model/dashscope/WanxImageModel.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
package dev.langchain4j.model.dashscope; | ||
|
||
import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesis; | ||
import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesisParam; | ||
import com.alibaba.dashscope.aigc.imagesynthesis.ImageSynthesisResult; | ||
import com.alibaba.dashscope.exception.NoApiKeyException; | ||
import dev.langchain4j.data.image.Image; | ||
import dev.langchain4j.internal.Utils; | ||
import dev.langchain4j.model.dashscope.spi.WanxImageModelBuilderFactory; | ||
import dev.langchain4j.model.image.ImageModel; | ||
import dev.langchain4j.model.output.Response; | ||
import lombok.Builder; | ||
|
||
import java.util.List; | ||
|
||
import static dev.langchain4j.model.dashscope.WanxHelper.imageUrl; | ||
import static dev.langchain4j.model.dashscope.WanxHelper.imagesFrom; | ||
import static dev.langchain4j.spi.ServiceHelper.loadFactories; | ||
|
||
/** | ||
* Represents a Wanx models to generate artistic images. | ||
* More details are available <a href="https://help.aliyun.com/zh/dashscope/developer-reference/api-details-9">here</a>. | ||
*/ | ||
public class WanxImageModel implements ImageModel { | ||
private final String apiKey; | ||
private final String modelName; | ||
// The generation method of the reference image. The optional values are | ||
// 'repaint' and 'refonly'; repaint represents the reference content and | ||
// refonly represents the reference style. Default is 'repaint'. | ||
private final WanxImageRefMode refMode; | ||
// The similarity between the expected output result and the reference image, | ||
// the value range is [0.0, 1.0]. The larger the number, the more similar the | ||
// generated result is to the reference image. Default is 0.5. | ||
private final Float refStrength; | ||
private final Integer seed; | ||
// The resolution of the generated image currently only supports '1024*1024', | ||
// '720*1280', and '1280*720' resolutions. Default is '1024*1024'. | ||
private final WanxImageSize size; | ||
private final WanxImageStyle style; | ||
private final ImageSynthesis imageSynthesis; | ||
|
||
@Builder | ||
public WanxImageModel(String baseUrl, | ||
String apiKey, | ||
String modelName, | ||
WanxImageRefMode refMode, | ||
Float refStrength, | ||
Integer seed, | ||
WanxImageSize size, | ||
WanxImageStyle style) { | ||
if (Utils.isNullOrBlank(apiKey)) { | ||
throw new IllegalArgumentException("DashScope api key must be defined. It can be generated here: https://dashscope.console.aliyun.com/apiKey"); | ||
} | ||
this.modelName = Utils.isNullOrBlank(modelName) ? WanxModelName.WANX_V1 : modelName; | ||
this.apiKey = apiKey; | ||
this.refMode = refMode; | ||
this.refStrength = refStrength; | ||
this.seed = seed; | ||
this.size = size; | ||
this.style = style; | ||
this.imageSynthesis = Utils.isNullOrBlank(baseUrl) ? new ImageSynthesis() : new ImageSynthesis("text2image", baseUrl); | ||
} | ||
|
||
@Override | ||
public Response<Image> generate(String prompt) { | ||
ImageSynthesisParam param = requestBuilder(prompt).n(1).build(); | ||
|
||
try { | ||
ImageSynthesisResult result = imageSynthesis.call(param); | ||
return Response.from(imagesFrom(result).get(0)); | ||
} catch (NoApiKeyException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
@Override | ||
public Response<List<Image>> generate(String prompt, int n) { | ||
ImageSynthesisParam param = requestBuilder(prompt).n(n).build(); | ||
|
||
try { | ||
ImageSynthesisResult result = imageSynthesis.call(param); | ||
return Response.from(imagesFrom(result)); | ||
} catch (NoApiKeyException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
@Override | ||
public Response<Image> edit(Image image, String prompt) { | ||
String imageUrl = imageUrl(image, modelName, apiKey); | ||
|
||
ImageSynthesisParam.ImageSynthesisParamBuilder<?, ?> builder = requestBuilder(prompt) | ||
.refImage(imageUrl) | ||
.n(1); | ||
|
||
if (imageUrl.startsWith("oss://")) { | ||
builder.header("X-DashScope-OssResourceResolve", "enable"); | ||
} | ||
|
||
try { | ||
ImageSynthesisResult result = imageSynthesis.call(builder.build()); | ||
return Response.from(imagesFrom(result).get(0)); | ||
} catch (NoApiKeyException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
private ImageSynthesisParam.ImageSynthesisParamBuilder<?, ?> requestBuilder(String prompt) { | ||
ImageSynthesisParam.ImageSynthesisParamBuilder<?, ?> builder = ImageSynthesisParam.builder() | ||
.apiKey(apiKey) | ||
.model(modelName) | ||
.prompt(prompt); | ||
|
||
if (seed != null) { | ||
builder.seed(seed); | ||
} | ||
|
||
if (size != null) { | ||
builder.size(size.toString()); | ||
} | ||
|
||
if (style != null) { | ||
builder.style(style.toString()); | ||
} | ||
|
||
if (refMode != null) { | ||
builder.parameter("ref_mode", refMode.toString()); | ||
} | ||
|
||
if (refStrength != null) { | ||
builder.parameter("ref_strength", refStrength); | ||
} | ||
|
||
return builder; | ||
} | ||
|
||
public static WanxImageModel.WanxImageModelBuilder builder() { | ||
for (WanxImageModelBuilderFactory factory : loadFactories(WanxImageModelBuilderFactory.class)) { | ||
return factory.get(); | ||
} | ||
return new WanxImageModel.WanxImageModelBuilder(); | ||
} | ||
|
||
public static class WanxImageModelBuilder { | ||
public WanxImageModelBuilder() { | ||
// This is public so it can be extended | ||
// By default with Lombok it becomes package private | ||
} | ||
} | ||
} |
17 changes: 17 additions & 0 deletions
17
langchain4j-dashscope/src/main/java/dev/langchain4j/model/dashscope/WanxImageRefMode.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package dev.langchain4j.model.dashscope; | ||
|
||
public enum WanxImageRefMode { | ||
REPAINT("repaint"), | ||
REFONLY("refonly"); | ||
|
||
private final String mode; | ||
|
||
WanxImageRefMode(String mode) { | ||
this.mode = mode; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return mode; | ||
} | ||
} |
18 changes: 18 additions & 0 deletions
18
langchain4j-dashscope/src/main/java/dev/langchain4j/model/dashscope/WanxImageSize.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package dev.langchain4j.model.dashscope; | ||
|
||
public enum WanxImageSize { | ||
SIZE_1024_1024("1024*1024"), | ||
SIZE_720_1280("720*1280"), | ||
SIZE_1280_720("1280*720"); | ||
|
||
private final String size; | ||
|
||
WanxImageSize(String size) { | ||
this.size = size; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return size; | ||
} | ||
} |
25 changes: 25 additions & 0 deletions
25
langchain4j-dashscope/src/main/java/dev/langchain4j/model/dashscope/WanxImageStyle.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package dev.langchain4j.model.dashscope; | ||
|
||
public enum WanxImageStyle { | ||
PHOTOGRAPHY("<photography>"), | ||
PORTRAIT("<portrait>"), | ||
CARTOON_3D("<3d cartoon>"), | ||
ANIME("<anime>"), | ||
OIL_PAINTING("<oil painting>"), | ||
WATERCOLOR("<watercolor>"), | ||
SKETCH("<sketch>"), | ||
CHINESE_PAINTING("<chinese painting>"), | ||
FLAT_ILLUSTRATION("<flat illustration>"), | ||
AUTO("<auto>"); | ||
|
||
private final String style; | ||
|
||
WanxImageStyle(String style) { | ||
this.style = style; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return style; | ||
} | ||
} |
6 changes: 6 additions & 0 deletions
6
langchain4j-dashscope/src/main/java/dev/langchain4j/model/dashscope/WanxModelName.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
package dev.langchain4j.model.dashscope; | ||
|
||
public class WanxModelName { | ||
// Use with WanxImageModel | ||
public static final String WANX_V1 = "wanx-v1"; // Wanx model for text-generated images, supports Chinese and English | ||
} |
8 changes: 8 additions & 0 deletions
8
...hscope/src/main/java/dev/langchain4j/model/dashscope/spi/QwenTokenizerBuilderFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package dev.langchain4j.model.dashscope.spi; | ||
|
||
import dev.langchain4j.model.dashscope.QwenTokenizer; | ||
|
||
import java.util.function.Supplier; | ||
|
||
public interface QwenTokenizerBuilderFactory extends Supplier<QwenTokenizer.QwenTokenizerBuilder> { | ||
} |
8 changes: 8 additions & 0 deletions
8
...scope/src/main/java/dev/langchain4j/model/dashscope/spi/WanxImageModelBuilderFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package dev.langchain4j.model.dashscope.spi; | ||
|
||
import dev.langchain4j.model.dashscope.WanxImageModel; | ||
|
||
import java.util.function.Supplier; | ||
|
||
public interface WanxImageModelBuilderFactory extends Supplier<WanxImageModel.WanxImageModelBuilder> { | ||
} |
Oops, something went wrong.