Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions maestro-aws/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ dependencies {
implementation('io.awspring.cloud:spring-cloud-aws-starter-sqs:3.+') {
exclude group: 'ch.qos.logback'
}
implementation('org.redisson:redisson:3.+')
Copy link

Copilot AI Jul 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using a dynamic version (3.+) can lead to unexpected upgrades. Specify an exact version that matches your lockfile for reproducible builds.

Copilot uses AI. Check for mistakes.

implementation project(':maestro-common')
implementation project(':maestro-engine')
Expand Down
6 changes: 6 additions & 0 deletions maestro-aws/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@ services:
- "${LOCALSTACK_VOLUME_DIR:-./tmp}:/var/lib/localstack"
- "/var/run/docker.sock:/var/run/docker.sock"
- "./localstack:/etc/localstack/init/ready.d"
redis:
image: redis:latest # Uses the latest redis image
ports:
- "6379:6379"
volumes:
- ./tmp/redis:/data
21 changes: 21 additions & 0 deletions maestro-aws/gradle.lockfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# This is a Gradle generated file for dependency locking.
# Manual edits can break the build and are not advised.
# This file is expected to be part of source control.
com.esotericsoftware:kryo:5.6.2=compileClasspath
com.esotericsoftware:minlog:1.3.1=compileClasspath
com.esotericsoftware:reflectasm:1.11.9=compileClasspath
com.fasterxml.jackson.core:jackson-annotations:2.19.0=compileClasspath
com.fasterxml.jackson.core:jackson-core:2.19.0=compileClasspath
com.fasterxml.jackson.core:jackson-databind:2.19.0=compileClasspath
com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.19.0=compileClasspath
com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.19.0=compileClasspath
com.fasterxml.jackson:jackson-bom:2.19.0=compileClasspath
io.awspring.cloud:spring-cloud-aws-autoconfigure:3.4.0=compileClasspath
io.awspring.cloud:spring-cloud-aws-core:3.4.0=compileClasspath
Expand All @@ -15,11 +20,27 @@ io.awspring.cloud:spring-cloud-aws-starter:3.4.0=compileClasspath
io.micrometer:context-propagation:1.1.3=compileClasspath
io.micrometer:micrometer-commons:1.15.0=compileClasspath
io.micrometer:micrometer-observation:1.15.0=compileClasspath
io.netty:netty-buffer:4.1.121.Final=compileClasspath
io.netty:netty-codec-dns:4.1.121.Final=compileClasspath
io.netty:netty-codec:4.1.121.Final=compileClasspath
io.netty:netty-common:4.1.121.Final=compileClasspath
io.netty:netty-handler:4.1.121.Final=compileClasspath
io.netty:netty-resolver-dns:4.1.121.Final=compileClasspath
io.netty:netty-resolver:4.1.121.Final=compileClasspath
io.netty:netty-transport-native-unix-common:4.1.121.Final=compileClasspath
io.netty:netty-transport:4.1.121.Final=compileClasspath
io.projectreactor:reactor-core:3.6.2=compileClasspath
io.reactivex.rxjava3:rxjava:3.1.8=compileClasspath
jakarta.annotation:jakarta.annotation-api:2.1.1=compileClasspath
javax.cache:cache-api:1.1.1=compileClasspath
net.bytebuddy:byte-buddy:1.15.3=compileClasspath
org.apache.logging.log4j:log4j-api:2.24.3=compileClasspath
org.apache.logging.log4j:log4j-to-slf4j:2.24.3=compileClasspath
org.jodd:jodd-util:6.3.0=compileClasspath
org.objenesis:objenesis:3.4=compileClasspath
org.projectlombok:lombok:1.18.38=annotationProcessor,compileClasspath
org.reactivestreams:reactive-streams:1.0.4=compileClasspath
org.redisson:redisson:3.50.0=compileClasspath
org.slf4j:jul-to-slf4j:2.0.17=compileClasspath
org.slf4j:slf4j-api:2.0.17=compileClasspath
org.springframework.boot:spring-boot-autoconfigure:3.5.0=compileClasspath
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@
package com.netflix.maestro.engine.autoconfigure;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.netflix.maestro.engine.concurrency.InstanceStepConcurrencyHandler;
import com.netflix.maestro.engine.concurrency.RedisInstanceStepConcurrencyHandler;
import com.netflix.maestro.engine.listeners.SqsSignalInstanceListener;
import com.netflix.maestro.engine.listeners.SqsSignalTriggerExecutionListener;
import com.netflix.maestro.engine.listeners.SqsSignalTriggerMatchListener;
import com.netflix.maestro.engine.listeners.SqsTimeTriggerExecutionListener;
import com.netflix.maestro.engine.producer.SqsSignalQueueProducer;
import com.netflix.maestro.engine.producer.SqsTimeTriggerProducer;
import com.netflix.maestro.engine.properties.AwsProperties;
import com.netflix.maestro.engine.properties.RedisProperties;
import com.netflix.maestro.engine.properties.SqsProperties;
import com.netflix.maestro.engine.publisher.MaestroNotificationPublisher;
import com.netflix.maestro.engine.publisher.SnsEventNotificationPublisher;
Expand All @@ -36,6 +39,10 @@
import io.awspring.cloud.sqs.operations.SqsTemplate;
import java.time.Duration;
import lombok.extern.slf4j.Slf4j;
import org.redisson.Redisson;
import org.redisson.api.RedissonClient;
import org.redisson.client.codec.StringCodec;
import org.redisson.config.Config;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
Expand Down Expand Up @@ -155,4 +162,48 @@ private AsyncTaskExecutor createDefaultTaskExecutor(SqsProperties props) {
threadPoolTaskExecutor.afterPropertiesSet();
return threadPoolTaskExecutor;
}

@Bean
@ConditionalOnProperty(value = "maestro.redis.enabled", havingValue = "true")
public InstanceStepConcurrencyHandler redisInstanceStepConcurrencyHandler(
RedissonClient redisson, MaestroMetrics metricRepo) {
LOG.info("Creating maestro redisInstanceStepConcurrencyHandler within Spring boot...");
return new RedisInstanceStepConcurrencyHandler(
redisson.getScript(StringCodec.INSTANCE), metricRepo);
}

@Bean(destroyMethod = "shutdown")
@ConditionalOnProperty(value = "maestro.redis.enabled", havingValue = "true")
public RedissonClient redisson(AwsProperties props) {
RedisProperties redisProps = props.getRedis();
RedisProperties.RedisServerType redisServerType = redisProps.getRedisServerType();

Config redisConfig = new Config();
String redisServerAddress = redisProps.getRedisServerAddress();
int connectionTimeout = redisProps.getRedisConnectionTimeout();
int scanInterval = redisProps.getRedisScanInterval();

switch (redisServerType) {
case CLUSTER:
redisConfig
.useClusterServers()
.setScanInterval(scanInterval) // cluster state scan interval in milliseconds
.addNodeAddress(redisServerAddress.split(","))
.setTimeout(connectionTimeout);
break;
case SENTINEL:
redisConfig
.useSentinelServers()
.setScanInterval(scanInterval)
.addSentinelAddress(redisServerAddress)
.setTimeout(connectionTimeout);
break;
case SINGLE:
default:
redisConfig.useSingleServer().setAddress(redisServerAddress).setTimeout(connectionTimeout);
break;
}

return Redisson.create(redisConfig);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
/*
* Copyright 2025 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package com.netflix.maestro.engine.concurrency;

import com.netflix.maestro.engine.execution.RunRequest;
import com.netflix.maestro.engine.metrics.AwsMetricConstants;
import com.netflix.maestro.exceptions.MaestroRetryableError;
import com.netflix.maestro.metrics.MaestroMetrics;
import com.netflix.maestro.models.Constants;
import com.netflix.maestro.models.Defaults;
import com.netflix.maestro.models.error.Details;
import com.netflix.maestro.utils.ObjectHelper;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.redisson.api.RScript;

/** Redis based instance_step_concurrency handler implementation. */
@Slf4j
@AllArgsConstructor
public class RedisInstanceStepConcurrencyHandler implements InstanceStepConcurrencyHandler {
private static final String ADD_KEY_INTO_SET_STMT =
"return redis.call('SADD', KEYS[1], ARGV[1]);";
private static final String REMOVE_KEY_FROM_SET_STMT =
"return redis.call('SREM', KEYS[1], ARGV[1]);";
private static final String ADD_INSTANCE_INTO_SET_STMT =
"if redis.call('SISMEMBER', KEYS[1], ARGV[1]) == 1 then return 1; end; "
+ "local limit = tonumber(ARGV[2]); "
+ "if redis.call('SCARD', KEYS[2]) >= limit then return 0; end; "
+ "if redis.call('SCARD', KEYS[1]) >= limit then return 0; end; "
+ "redis.call('SADD', KEYS[1], ARGV[1]); return 1;";
private static final String CLEAN_UP_ALL_DATA_STMT = "return redis.call('DEL', unpack(KEYS));";

private final RScript redisLua;
private final MaestroMetrics metrics;

/** Add a step uuid into the uuid set for a given concurrencyId. */
@Override
public Optional<Details> registerStep(String concurrencyId, String uuid) {
try {
String idWithHash = withHashTag(concurrencyId, 0);
Long res =
redisLua.eval(
RScript.Mode.READ_WRITE,
ADD_KEY_INTO_SET_STMT,
RScript.ReturnType.INTEGER,
Collections.singletonList(idWithHash),
uuid);
LOG.debug("Register a step: [{}][{}] with a result: [{}]", idWithHash, uuid, res);
return Optional.empty();
} catch (Exception e) {
metrics.counter(
AwsMetricConstants.INSTANCE_STEP_CONCURRENCY_ERROR_METRIC,
getClass(),
AwsMetricConstants.TYPE_TAG,
"failedRegisterStep");
return Optional.of(Details.create(e, true, "Failed to register a step: " + uuid));
}
}

/**
* Unregister a step uuid from the uuid set for a given concurrencyId. If failed, it throws a
* MaestroRetryableError.
*/
@Override
public void unregisterStep(String concurrencyId, String uuid) {
try {
String idWithHash = withHashTag(concurrencyId, 0);
Long res =
redisLua.eval(
RScript.Mode.READ_WRITE,
REMOVE_KEY_FROM_SET_STMT,
RScript.ReturnType.INTEGER,
Collections.singletonList(idWithHash),
uuid);
LOG.debug("Unregister a step: [{}][{}] with a result: [{}]", idWithHash, uuid, res);
} catch (Exception e) {
metrics.counter(
AwsMetricConstants.INSTANCE_STEP_CONCURRENCY_ERROR_METRIC,
getClass(),
AwsMetricConstants.TYPE_TAG,
"failedUnregisterStep");
throw new MaestroRetryableError(
e, "Failed to unregister a step [%s] for concurrencyId [%s]", uuid, concurrencyId);
}
}

private String withHashTag(String concurrencyId, int depth) {
return "{" + concurrencyId + "}:" + depth;
}

/**
* Add an instance uuid into the uuid set for a given concurrencyId with depth. It will check if
* either instance uuid set or step uuid set reach the limit. If yes, return false; otherwise
* return true. If there is any exception, also return false. When instance_step_concurrency is
* unset meaning disabled, it will only check the instance uuid set but the step uuid set will not
* be used for throttling (no step is registered so just an always-true check).
*/
@Override
public boolean addInstance(RunRequest runRequest) {
long limit =
ObjectHelper.valueOrDefault(
runRequest.getInstanceStepConcurrency(), Defaults.DEFAULT_INSTANCE_STEP_CONCURRENCY);
int depth = runRequest.getInitiator().getDepth();
String uuid = runRequest.getRequestId().toString();

String idWithHash = withHashTag(runRequest.getCorrelationId(), 0);
String idWithDepth = withHashTag(runRequest.getCorrelationId(), depth);
try {
Boolean res =
redisLua.eval(
RScript.Mode.READ_WRITE,
ADD_INSTANCE_INTO_SET_STMT,
RScript.ReturnType.BOOLEAN,
Arrays.asList(idWithDepth, idWithHash),
uuid,
String.valueOf(limit));
LOG.debug(
"Add an instance: [{}][{}] for limit [{}] with a result: [{}]",
idWithDepth,
uuid,
limit,
res);
return res;
} catch (Exception e) {
LOG.warn(
"Cannot add instance for concurrency id [{}], depth [{}], uuid [{}] due to ",
idWithHash,
Copy link

Copilot AI Jul 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The log in the catch block uses idWithHash (depth=0) instead of the actual depth-specific key or raw concurrencyId. Consider using idWithDepth or concurrencyId for clearer and accurate logging.

Suggested change
idWithHash,
idWithDepth,

Copilot uses AI. Check for mistakes.
depth,
uuid,
e);
metrics.counter(
AwsMetricConstants.INSTANCE_STEP_CONCURRENCY_ERROR_METRIC,
getClass(),
AwsMetricConstants.TYPE_TAG,
"failedAddInstance");
return false;
}
}

/**
* Remove an instance uuid from the uuid set for a given concurrencyId with depth. If failed, it
* throws a MaestroRetryableError.
*/
@Override
public void removeInstance(String concurrencyId, int depth, String uuid) {
String idWithDepth = withHashTag(concurrencyId, depth);
try {
Long res =
redisLua.eval(
RScript.Mode.READ_WRITE,
REMOVE_KEY_FROM_SET_STMT,
RScript.ReturnType.INTEGER,
Collections.singletonList(idWithDepth),
uuid);
LOG.debug("Remove an instance: [{}][{}] with a result: [{}]", idWithDepth, uuid, res);
} catch (Exception e) {
metrics.counter(
AwsMetricConstants.INSTANCE_STEP_CONCURRENCY_ERROR_METRIC,
getClass(),
AwsMetricConstants.TYPE_TAG,
"failedRemoveInstance");
throw new MaestroRetryableError(
e,
"Failed to remove instance for concurrency id [%s], depth [%s], uuid [%s]",
concurrencyId,
depth,
uuid);
}
}

@Override
public void cleanUp(String concurrencyId) {
List<Object> ids =
IntStream.range(0, Constants.WORKFLOW_DEPTH_LIMIT)
.mapToObj(i -> withHashTag(concurrencyId, i))
.collect(Collectors.toList());
try {
Long res =
redisLua.eval(
RScript.Mode.READ_WRITE, CLEAN_UP_ALL_DATA_STMT, RScript.ReturnType.INTEGER, ids);
LOG.debug("Cleanup all sets: {} with a result: [{}]", ids, res);
} catch (Exception e) {
metrics.counter(
AwsMetricConstants.INSTANCE_STEP_CONCURRENCY_ERROR_METRIC,
getClass(),
AwsMetricConstants.TYPE_TAG,
"failedCleanUp");
throw new MaestroRetryableError(e, "Failed to cleanup all sets for [%s]", concurrencyId);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
/*
* Copyright 2025 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package com.netflix.maestro.engine.listeners;

import com.fasterxml.jackson.databind.ObjectMapper;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
/*
* Copyright 2025 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package com.netflix.maestro.engine.listeners;

import com.fasterxml.jackson.databind.ObjectMapper;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
/*
* Copyright 2025 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package com.netflix.maestro.engine.listeners;

import com.fasterxml.jackson.databind.ObjectMapper;
Expand Down
Loading