-
Notifications
You must be signed in to change notification settings - Fork 625
Add imagen editing options like inpainting and outpainting #7075
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
dec0990
9795fc3
60baab6
806d4fb
66cff7b
e710498
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,9 @@ import com.google.firebase.ai.common.util.fullModelName | |
import com.google.firebase.ai.common.util.trimmedModelName | ||
import com.google.firebase.ai.type.Content | ||
import com.google.firebase.ai.type.GenerationConfig | ||
import com.google.firebase.ai.type.ImagenEditingConfig | ||
import com.google.firebase.ai.type.ImagenImageFormat | ||
import com.google.firebase.ai.type.ImagenInlineImage | ||
import com.google.firebase.ai.type.PublicPreviewAPI | ||
import com.google.firebase.ai.type.SafetySetting | ||
import com.google.firebase.ai.type.Tool | ||
|
@@ -75,11 +77,17 @@ internal data class CountTokensRequest( | |
} | ||
|
||
@Serializable | ||
@PublicPreviewAPI | ||
davidmotson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
internal data class GenerateImageRequest( | ||
val instances: List<ImagenPrompt>, | ||
val parameters: ImagenParameters, | ||
) : Request { | ||
@Serializable internal data class ImagenPrompt(val prompt: String) | ||
@Serializable | ||
internal data class ImagenPrompt( | ||
val prompt: String? = null, | ||
val image: ImagenInlineImage.Internal? = null, | ||
davidmotson marked this conversation as resolved.
Show resolved
Hide resolved
|
||
val referenceImages: List<ReferenceImage>? = null | ||
) | ||
|
||
@OptIn(PublicPreviewAPI::class) | ||
@Serializable | ||
|
@@ -93,5 +101,38 @@ internal data class GenerateImageRequest( | |
val personGeneration: String?, | ||
val addWatermark: Boolean?, | ||
val imageOutputOptions: ImagenImageFormat.Internal?, | ||
val editMode: String?, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. REFERENCE COMMENT ONLY: Value comes from: https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-remove-objects?hl=en#rest_1 It's not documented in https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api-customization nor https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/imagen-api-edit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be |
||
val editConfig: ImagenEditingConfig.Internal?, | ||
) | ||
|
||
@Serializable | ||
internal enum class ReferenceType { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't some/all of these values available to the dev for some features to work? Like https://cloud.google.com/vertex-ai/generative-ai/docs/image/style-customization There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but right now the rest of the scaffolding isn't there to enable these features. As I add them, I'll also add access to these values. |
||
@SerialName("REFERENCE_TYPE_UNSPECIFIED") UNSPECIFIED, | ||
@SerialName("REFERENCE_TYPE_RAW") RAW, | ||
@SerialName("REFERENCE_TYPE_MASK") MASK, | ||
@SerialName("REFERENCE_TYPE_CONTROL") CONTROL, | ||
@SerialName("REFERENCE_TYPE_STYLE") STYLE, | ||
@SerialName("REFERENCE_TYPE_SUBJECT") SUBJECT, | ||
@SerialName("REFERENCE_TYPE_MASKED_SUBJECT") MASKED_SUBJECT, | ||
@SerialName("REFERENCE_TYPE_PRODUCT") PRODUCT | ||
} | ||
|
||
@Serializable | ||
internal enum class MaskMode { | ||
@SerialName("MASK_MODE_DEFAULT") DEFAULT, | ||
@SerialName("MASK_MODE_USER_PROVIDED") USER_PROVIDED, | ||
@SerialName("MASK_MODE_BACKGROUND") BACKGROUND, | ||
@SerialName("MASK_MODE_FOREGROUND") FOREGROUND, | ||
@SerialName("MASK_MODE_SEMANTIC") SEMANTIC | ||
} | ||
|
||
@Serializable internal data class MaskImageConfig(val maskMode: MaskMode, val dilation: Double?) | ||
|
||
@Serializable | ||
internal data class ReferenceImage( | ||
val referenceType: ReferenceType, | ||
val referenceId: Int, | ||
val referenceImage: ImagenInlineImage.Internal, | ||
val maskImageConfig: MaskImageConfig? | ||
) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package com.google.firebase.ai.type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please always include the copyright header to make sure the copyright check passes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I have it in the documentation PR instead |
||
|
||
public class ImagenEditMode private constructor(internal val value: String) { | ||
|
||
public companion object { | ||
public val INPAINT_INSERTION: ImagenEditMode = ImagenEditMode("EDIT_MODE_INPAINT_INSERTION") | ||
public val INPAINT_REMOVAL: ImagenEditMode = ImagenEditMode("EDIT_MODE_INPAINT_REMOVAL") | ||
public val OUTPAINT: ImagenEditMode = ImagenEditMode("EDIT_MODE_OUTPAINT") | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,68 @@ | ||||||||||||||||||||||||
package com.google.firebase.ai.type | ||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I'm not sure what the "same" here is referring to) |
||||||||||||||||||||||||
|
||||||||||||||||||||||||
import kotlinx.serialization.Serializable | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
@PublicPreviewAPI | ||||||||||||||||||||||||
public class ImagenEditingConfig( | ||||||||||||||||||||||||
internal val image: ImagenInlineImage, | ||||||||||||||||||||||||
internal val editMode: ImagenEditMode, | ||||||||||||||||||||||||
internal val mask: ImagenInlineImage? = null, | ||||||||||||||||||||||||
internal val maskDilation: Double? = null, | ||||||||||||||||||||||||
internal val editSteps: Int? = null, | ||||||||||||||||||||||||
) { | ||||||||||||||||||||||||
public companion object { | ||||||||||||||||||||||||
public fun builder(): Builder = Builder() | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public class Builder { | ||||||||||||||||||||||||
@JvmField public var image: ImagenInlineImage? = null | ||||||||||||||||||||||||
@JvmField public var editMode: ImagenEditMode? = null | ||||||||||||||||||||||||
@JvmField public var mask: ImagenInlineImage? = null | ||||||||||||||||||||||||
@JvmField public var maskDilation: Double? = null | ||||||||||||||||||||||||
@JvmField public var editSteps: Int? = null | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public fun setImage(image: ImagenInlineImage): Builder = apply { this.image = image } | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public fun setEditMode(editMode: ImagenEditMode): Builder = apply { this.editMode = editMode } | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public fun setMask(mask: ImagenInlineImage): Builder = apply { this.mask = mask } | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public fun setMaskDilation(maskDilation: Double): Builder = apply { | ||||||||||||||||||||||||
this.maskDilation = maskDilation | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public fun setEditSteps(editSteps: Int): Builder = apply { this.editSteps = editSteps } | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
public fun build(): ImagenEditingConfig { | ||||||||||||||||||||||||
davidmotson marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||
if (image == null) { | ||||||||||||||||||||||||
throw IllegalStateException("ImagenEditingConfig must contain an image") | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
if (editMode == null) { | ||||||||||||||||||||||||
throw IllegalStateException("ImagenEditingConfig must contain an editMode") | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
return ImagenEditingConfig( | ||||||||||||||||||||||||
image = image!!, | ||||||||||||||||||||||||
editMode = editMode!!, | ||||||||||||||||||||||||
mask = mask, | ||||||||||||||||||||||||
maskDilation = maskDilation, | ||||||||||||||||||||||||
editSteps = editSteps, | ||||||||||||||||||||||||
) | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
internal fun toInternal(): Internal { | ||||||||||||||||||||||||
return Internal(baseSteps = editSteps) | ||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could the difference in name cause issues with error messages returned by the server? See shortn/_sPJ9lr7YkR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, it could, but maybe users could figure it out? or we could include it in the documentation? I can rename it if you think thats the best solution, but I'd prefer a more descriptive name. |
||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
@Serializable | ||||||||||||||||||||||||
internal data class Internal( | ||||||||||||||||||||||||
val baseSteps: Int?, | ||||||||||||||||||||||||
) | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
@PublicPreviewAPI | ||||||||||||||||||||||||
public fun imagenEditingConfig(init: ImagenEditingConfig.Builder.() -> Unit): ImagenEditingConfig { | ||||||||||||||||||||||||
val builder = ImagenEditingConfig.builder() | ||||||||||||||||||||||||
builder.init() | ||||||||||||||||||||||||
return builder.build() | ||||||||||||||||||||||||
Comment on lines
+64
to
+67
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider adding a default value for
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bad idea, the server has default values that should be prefered over these, especially 0 edit steps would likely cause issues. |
||||||||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -18,6 +18,9 @@ package com.google.firebase.ai.type | |||
|
||||
import android.graphics.Bitmap | ||||
import android.graphics.BitmapFactory | ||||
import android.util.Base64 | ||||
import java.io.ByteArrayOutputStream | ||||
import kotlinx.serialization.Serializable | ||||
|
||||
/** | ||||
* Represents an Imagen-generated image that is returned as inline data. | ||||
|
@@ -36,4 +39,19 @@ internal constructor(public val data: ByteArray, public val mimeType: String) { | |||
public fun asBitmap(): Bitmap { | ||||
return BitmapFactory.decodeByteArray(data, 0, data.size) | ||||
} | ||||
|
||||
@Serializable internal data class Internal(val bytesBase64Encoded: String) | ||||
|
||||
internal fun toInternal(): Internal { | ||||
val base64 = Base64.encodeToString(data, Base64.NO_WRAP) | ||||
return Internal(base64) | ||||
} | ||||
} | ||||
|
||||
@PublicPreviewAPI | ||||
public fun Bitmap.toImagenInlineImage(): ImagenInlineImage { | ||||
val byteArrayOutputStream = ByteArrayOutputStream() | ||||
this.compress(Bitmap.CompressFormat.PNG, 100, byteArrayOutputStream) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously we decided to go with JPEG for these conversions, see firebase-android-sdk/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Part.kt Line 50 in e4955d9
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For image editing like this, I figured the lossless format would make a better default. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can replace it with the 80% jpeg, or we could change it to PNG across the board. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go with JPEG for consistency. That being said, it's absolutely true that we need to provide better interfaces to allow devs to pass the raw bytes (and/or file descriptors) for better compatibility |
||||
val byteArray = byteArrayOutputStream.toByteArray() | ||||
return ImagenInlineImage(data = byteArray, mimeType = "image/png") | ||||
} |
Uh oh!
There was an error while loading. Please reload this page.