Skip to content

Commit

Permalink
Part 2: Support for cancel_after_timeinterval parameter in search and…
Browse files Browse the repository at this point in the history
… msearch request

This commit adds the handling of the new request level parameter and schedule cancellation task. It
also adds a cluster setting to set a global cancellation timeout for search request which will be
used in absence of request level timeout.

TEST: Added new tests in SearchCancellationIT
Signed-off-by: Sorabh Hamirwasia <sohami.apache@gmail.com>
  • Loading branch information
sohami committed Jul 28, 2021
1 parent 982a599 commit 433f91f
Show file tree
Hide file tree
Showing 7 changed files with 457 additions and 6 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ public TimeValue getCancelAfterTimeInterval() {

@Override
public SearchTask createTask(long id, String type, String action, TaskId parentTaskId, Map<String, String> headers) {
return new SearchTask(id, type, action, this::buildDescription, parentTaskId, headers);
return new SearchTask(id, type, action, this::buildDescription, parentTaskId, headers, true, cancelAfterTimeInterval);
}

public final String buildDescription() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

package org.opensearch.action.search;

import org.opensearch.common.unit.TimeValue;
import org.opensearch.tasks.CancellableTask;
import org.opensearch.tasks.TaskId;

Expand All @@ -48,7 +49,12 @@ public class SearchTask extends CancellableTask {

public SearchTask(long id, String type, String action, Supplier<String> descriptionSupplier,
TaskId parentTaskId, Map<String, String> headers) {
super(id, type, action, null, parentTaskId, headers);
this(id, type, action, descriptionSupplier, parentTaskId, headers, false, null);
}

public SearchTask(long id, String type, String action, Supplier<String> descriptionSupplier,
TaskId parentTaskId, Map<String, String> headers, boolean cancelOnTimeout, TimeValue cancelTimeout) {
super(id, type, action, null, parentTaskId, headers, cancelOnTimeout, cancelTimeout);
this.descriptionSupplier = descriptionSupplier;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.opensearch.action.support.ActionFilters;
import org.opensearch.action.support.HandledTransportAction;
import org.opensearch.action.support.IndicesOptions;
import org.opensearch.action.support.TimeoutTaskCancellationUtility;
import org.opensearch.client.Client;
import org.opensearch.client.OriginSettingClient;
import org.opensearch.client.node.NodeClient;
Expand Down Expand Up @@ -81,6 +82,7 @@
import org.opensearch.search.internal.SearchContext;
import org.opensearch.search.profile.ProfileShardResult;
import org.opensearch.search.profile.SearchProfileShardResults;
import org.opensearch.tasks.CancellableTask;
import org.opensearch.tasks.Task;
import org.opensearch.tasks.TaskId;
import org.opensearch.threadpool.ThreadPool;
Expand Down Expand Up @@ -121,6 +123,19 @@ public class TransportSearchAction extends HandledTransportAction<SearchRequest,
public static final Setting<Long> SHARD_COUNT_LIMIT_SETTING = Setting.longSetting(
"action.search.shard_count.limit", Long.MAX_VALUE, 1L, Property.Dynamic, Property.NodeScope);

// cluster level setting for timeout based search cancellation. If search request level parameter is present then that will take
// precedence over the cluster setting value
public static final String SEARCH_REQUEST_CANCEL_AFTER_TIMEINTERVAL_SETTING_KEY = "search.cancel_after_timeinterval";
public static final Setting<TimeValue> SEARCH_REQUEST_CANCEL_AFTER_TIMEINTERVAL_SETTING =
Setting.timeSetting(SEARCH_REQUEST_CANCEL_AFTER_TIMEINTERVAL_SETTING_KEY, TimeValue.timeValueSeconds(300), SearchService.NO_TIMEOUT,
Setting.Property.Dynamic, Setting.Property.NodeScope);

// cluster level setting to control enabling/disabling the timeout based cancellation. This is enabled by default
public static final String SEARCH_REQUEST_CANCELLATION_ENABLE_SETTING_KEY = "search.timeout.cancellation.enable";
public static final Setting<Boolean> SEARCH_REQUEST_CANCELLATION_ENABLE_SETTING =
Setting.boolSetting(SEARCH_REQUEST_CANCELLATION_ENABLE_SETTING_KEY, true, Setting.Property.Dynamic,
Setting.Property.NodeScope);

private final NodeClient client;
private final ThreadPool threadPool;
private final ClusterService clusterService;
Expand Down Expand Up @@ -239,6 +254,15 @@ long buildTookInMillis() {

@Override
protected void doExecute(Task task, SearchRequest searchRequest, ActionListener<SearchResponse> listener) {
// only if task is of type CancellableTask and support cancellation on timeout, treat this request eligible for timeout based
// cancellation. There may be other top level requests like AsyncSearch which is using SearchRequest internally and has it's own
// cancellation mechanism. For such cases, the SearchRequest when created can override the createTask and provide the necessary
// flag to indicate it and bypass this mechanism
final boolean isTimeoutCancelEnabled = clusterService.getClusterSettings().get(SEARCH_REQUEST_CANCELLATION_ENABLE_SETTING);
if (task instanceof CancellableTask && ((CancellableTask) task).shouldCancelOnTimeout() && isTimeoutCancelEnabled) {
listener = TimeoutTaskCancellationUtility.wrapWithCancellationListener(client, (CancellableTask) task,
clusterService.getClusterSettings().get(SEARCH_REQUEST_CANCEL_AFTER_TIMEINTERVAL_SETTING), listener);
}
executeRequest(task, searchRequest, this::searchAsyncAction, listener);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.action.support;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.opensearch.action.ActionListener;
import org.opensearch.action.admin.cluster.node.tasks.cancel.CancelTasksRequest;
import org.opensearch.client.OriginSettingClient;
import org.opensearch.client.node.NodeClient;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.search.SearchService;
import org.opensearch.tasks.CancellableTask;
import org.opensearch.tasks.TaskId;
import org.opensearch.threadpool.Scheduler;
import org.opensearch.threadpool.ThreadPool;

import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.opensearch.action.admin.cluster.node.tasks.get.GetTaskAction.TASKS_ORIGIN;

public class TimeoutTaskCancellationUtility {

private static final Logger logger = LogManager.getLogger(TimeoutTaskCancellationUtility.class);

/**
* Wraps a listener with a timeout listener {@link TimeoutRunnableListener} to schedule the task cancellation for provided tasks on
* generic thread pool
* @param client - {@link NodeClient}
* @param taskToCancel - task to schedule cancellation for
* @param globalTimeout - global timeout to use for scheduling cancellation task in absence of task level parameter
* @param listener - original listener associated with the task
* @return wrapped listener
*/
public static <Response> ActionListener<Response> wrapWithCancellationListener(NodeClient client, CancellableTask taskToCancel,
TimeValue globalTimeout, ActionListener<Response> listener) {
final TimeValue timeoutInterval = (taskToCancel.getCancellationTimeout() == null) ? globalTimeout
: taskToCancel.getCancellationTimeout();
// Note: If -1 (or no timeout) is set at request level then we will use that value instead of cluster level value. This will help
// to turn off cancellation at request level.
ActionListener<Response> listenerToReturn = listener;
if (timeoutInterval.equals(SearchService.NO_TIMEOUT)) {
return listenerToReturn;
}

try {
final TimeoutRunnableListener<Response> wrappedListener = new TimeoutRunnableListener<>(timeoutInterval, listener, () -> {
final CancelTasksRequest cancelTasksRequest = new CancelTasksRequest();
cancelTasksRequest.setTaskId(new TaskId(client.getLocalNodeId(), taskToCancel.getId()));
cancelTasksRequest.setReason("Cancellation timeout of " + timeoutInterval + " is expired");
// force the origin to execute the cancellation as a system user
new OriginSettingClient(client, TASKS_ORIGIN).admin().cluster()
.cancelTasks(cancelTasksRequest, ActionListener.wrap(r -> logger.debug(
"Scheduled cancel task with timeout: {} for original task: {} is successfully completed", timeoutInterval,
cancelTasksRequest.getTaskId()),
e -> logger.error(new ParameterizedMessage("Scheduled cancel task with timeout: {} for original task: {} is failed",
timeoutInterval, cancelTasksRequest.getTaskId()), e))
);
});
wrappedListener.cancellable = client.threadPool().schedule(wrappedListener, timeoutInterval, ThreadPool.Names.GENERIC);
listenerToReturn = wrappedListener;
} catch (Exception ex) {
// if there is any exception in scheduling the cancellation task then continue without it
logger.warn("Failed to schedule the cancellation task for original task: {}, will continue without it", taskToCancel.getId());
}
return listenerToReturn;
}

/**
* Timeout listener which executes the provided runnable after timeout is expired and if a response/failure is not yet received.
* If either a response/failure is received before timeout then the scheduled task is cancelled and response/failure is sent back to
* the original listener.
*/
private static class TimeoutRunnableListener<Response> implements ActionListener<Response>, Runnable {

private static final Logger logger = LogManager.getLogger(TimeoutRunnableListener.class);

// Runnable to execute after timeout
private final TimeValue timeout;
private final ActionListener<Response> originalListener;
private final Runnable timeoutRunnable;
private final AtomicBoolean executeRunnable = new AtomicBoolean(true);
private volatile Scheduler.ScheduledCancellable cancellable;
private final long creationTime;

TimeoutRunnableListener(TimeValue timeout, ActionListener<Response> listener, Runnable runAfterTimeout) {
this.timeout = timeout;
this.originalListener = listener;
this.timeoutRunnable = runAfterTimeout;
this.creationTime = System.nanoTime();
}

@Override public void onResponse(Response response) {
checkAndCancel();
originalListener.onResponse(response);
}

@Override public void onFailure(Exception e) {
checkAndCancel();
originalListener.onFailure(e);
}

@Override public void run() {
try {
if (executeRunnable.compareAndSet(true, false)) {
timeoutRunnable.run();
} // else do nothing since either response/failure is already sent to client
} catch (Exception ex) {
// ignore the exception
logger.error(new ParameterizedMessage("Ignoring the failure to run the provided runnable after timeout of {} with " +
"exception", timeout), ex);
}
}

private void checkAndCancel() {
if (executeRunnable.compareAndSet(true, false)) {
logger.debug("Aborting the scheduled cancel task after {}",
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - creationTime));
// timer has not yet expired so cancel it
cancellable.cancel();
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@ public void apply(Settings value, Settings current, Settings previous) {
SearchService.DEFAULT_ALLOW_PARTIAL_SEARCH_RESULTS,
ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING,
TransportSearchAction.SHARD_COUNT_LIMIT_SETTING,
TransportSearchAction.SEARCH_REQUEST_CANCELLATION_ENABLE_SETTING,
TransportSearchAction.SEARCH_REQUEST_CANCEL_AFTER_TIMEINTERVAL_SETTING,
RemoteClusterService.REMOTE_CLUSTER_SKIP_UNAVAILABLE,
RemoteClusterService.SEARCH_REMOTE_CLUSTER_SKIP_UNAVAILABLE,
SniffConnectionStrategy.REMOTE_CONNECTIONS_PER_CLUSTER,
Expand Down
18 changes: 18 additions & 0 deletions server/src/main/java/org/opensearch/tasks/CancellableTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.tasks;

import org.opensearch.common.Nullable;
import org.opensearch.common.unit.TimeValue;

import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
Expand All @@ -44,9 +45,18 @@ public abstract class CancellableTask extends Task {

private volatile String reason;
private final AtomicBoolean cancelled = new AtomicBoolean(false);
private final boolean cancelOnTimeout;
private final TimeValue cancellationTimeout;

public CancellableTask(long id, String type, String action, String description, TaskId parentTaskId, Map<String, String> headers) {
this(id, type, action, description, parentTaskId, headers, false, null);
}

public CancellableTask(long id, String type, String action, String description, TaskId parentTaskId, Map<String, String> headers,
boolean cancelOnTimeout, TimeValue cancellationTimeout) {
super(id, type, action, description, parentTaskId, headers);
this.cancelOnTimeout = cancelOnTimeout;
this.cancellationTimeout = cancellationTimeout;
}

/**
Expand Down Expand Up @@ -77,6 +87,14 @@ public boolean isCancelled() {
return cancelled.get();
}

public boolean shouldCancelOnTimeout() {
return cancelOnTimeout;
}

public TimeValue getCancellationTimeout() {
return cancellationTimeout;
}

/**
* The reason the task was cancelled or null if it hasn't been cancelled.
*/
Expand Down

0 comments on commit 433f91f

Please sign in to comment.