Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Distinct interrupted from failed to start workspace metric #18258

Merged
merged 5 commits into from
Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
*/
@Singleton
public class WorkspaceFailureMeterBinder implements MeterBinder {

private final EventService eventService;

private Counter startingStoppedFailureCounter;
Expand All @@ -50,10 +51,11 @@ public void bindTo(MeterRegistry registry) {
// only subscribe to the event once we have the counters ready
eventService.subscribe(
event -> {
if (event.getError() == null || event.getStatus() != WorkspaceStatus.STOPPED) {
if (event.getError() == null
skabashnyuk marked this conversation as resolved.
Show resolved Hide resolved
|| event.getStatus() != WorkspaceStatus.STOPPED
|| event.isInitiatedByUser()) {
return;
}

Counter counter;
switch (event.getPrevStatus()) {
case STARTING:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2012-2018 Red Hat, Inc.
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Red Hat, Inc. - initial API and implementation
*/
package org.eclipse.che.api.metrics;

import static org.eclipse.che.api.metrics.WorkspaceBinders.workspaceMetric;

import com.google.inject.Inject;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.binder.MeterBinder;
import javax.inject.Singleton;
import org.eclipse.che.api.core.model.workspace.WorkspaceStatus;
import org.eclipse.che.api.core.notification.EventService;
import org.eclipse.che.api.workspace.shared.dto.event.WorkspaceStatusEvent;

/** Counts number of workspace startup interruption. */
@Singleton
public class WorkspaceInterruptedStartAttemptsMeterBinder implements MeterBinder {
private final EventService eventService;

private Counter interruptionCounter;

@Inject
public WorkspaceInterruptedStartAttemptsMeterBinder(EventService eventService) {
this.eventService = eventService;
}

@Override
public void bindTo(MeterRegistry registry) {
interruptionCounter =
Counter.builder(workspaceMetric("start.interrupt.total"))
.description("The count of workspace startup interruption")
.register(registry);

// only subscribe to the event once we have the counters ready
eventService.subscribe(
event -> {
if (event.getPrevStatus() == WorkspaceStatus.STARTING
&& event.getStatus() == WorkspaceStatus.STOPPED
&& event.isInitiatedByUser()) {
interruptionCounter.increment();
}
},
WorkspaceStatusEvent.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ protected void configure() {
meterMultibinder.addBinding().to(WorkspaceSuccessfulStartAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceSuccessfulStopAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceStartAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceInterruptedStartAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(UserMeterBinder.class);
meterMultibinder.addBinding().to(RuntimeLogMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceMeterBinder.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ public void shouldNotCollectFailureWhenNoErrorInEvent(WorkspaceStatus prevStatus
failureCounters.forEach(c -> assertEquals(c.count(), 0d));
}

@Test
public void shouldNotCollectInterruptedEvent() {
// given
WorkspaceStatusEvent event =
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withInitiatedByUser(true)
.withError("interrupted")
.withWorkspaceId("1");

// when
events.onEvent(event);

// then
failureCounters.forEach(c -> assertEquals(c.count(), 0d));
}

@Test(dataProvider = "allStatusTransitionsWithoutToStopped")
public void shouldNotCollectFailureWhenNotTransitioningToStopped(
WorkspaceStatus from, WorkspaceStatus to) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2012-2018 Red Hat, Inc.
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Red Hat, Inc. - initial API and implementation
*/
package org.eclipse.che.api.metrics;

import static java.util.Arrays.asList;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.che.api.core.model.workspace.WorkspaceStatus;
import org.eclipse.che.api.core.notification.EventService;
import org.eclipse.che.api.core.notification.EventSubscriber;
import org.eclipse.che.api.workspace.shared.dto.event.WorkspaceStatusEvent;
import org.eclipse.che.dto.server.DtoFactory;
import org.mockito.ArgumentCaptor;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

public class WorkspaceInterruptedStartAttemptsMeterBinderTest {

private EventSubscriber<WorkspaceStatusEvent> events;
private Counter interruptedCounter;

@BeforeMethod
public void setup() {
MeterRegistry registry = new SimpleMeterRegistry();

EventService eventService = mock(EventService.class);

WorkspaceInterruptedStartAttemptsMeterBinder meterBinder =
new WorkspaceInterruptedStartAttemptsMeterBinder(eventService);

meterBinder.bindTo(registry);

@SuppressWarnings("unchecked")
ArgumentCaptor<EventSubscriber<WorkspaceStatusEvent>> statusChangeEventCaptor =
ArgumentCaptor.forClass(EventSubscriber.class);

interruptedCounter = registry.find("che.workspace.start.interrupt.total").counter();

verify(eventService)
.subscribe(statusChangeEventCaptor.capture(), eq(WorkspaceStatusEvent.class));

events = statusChangeEventCaptor.getValue();
}

@Test
public void shouldCountWorkspaceInterruptedEvent() {
// given
WorkspaceStatusEvent event =
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withInitiatedByUser(true)
.withError("interrupted")
.withWorkspaceId("1");

// when
events.onEvent(event);

// then
Assert.assertEquals(interruptedCounter.count(), 1.0);
}

@Test
public void shouldNotCountWorkspaceNonInterruptedEvent() {
// given
WorkspaceStatusEvent event =
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withInitiatedByUser(false)
.withError("interrupted")
.withWorkspaceId("1");

// when
events.onEvent(event);

// then
Assert.assertEquals(interruptedCounter.count(), 0.0);
}

@Test(dataProvider = "allStatusTransitionsWithoutToStopped")
public void shouldNotCollectInterruptionWhenNotTransitioningToStopped(
WorkspaceStatus from, WorkspaceStatus to) {
// This really doesn't make much sense because the codebase always transitions the workspace
// to STOPPED on interruption. This is just a precaution that a potential bug in the
// rest of the codebase doesn't affect the metric collection ;)

events.onEvent(
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(from)
.withStatus(to)
.withInitiatedByUser(true)
.withError("D'oh!")
.withWorkspaceId("1"));

Assert.assertEquals(interruptedCounter.count(), 0.0);
}

@DataProvider
public Object[][] allStatusTransitionsWithoutToStopped() {
List<List<WorkspaceStatus>> transitions = new ArrayList<>(9);

for (WorkspaceStatus from : WorkspaceStatus.values()) {
for (WorkspaceStatus to : WorkspaceStatus.values()) {
if (from == to || to == WorkspaceStatus.STOPPED) {
continue;
}

transitions.add(asList(from, to));
}
}

return transitions.stream().map(List::toArray).toArray(Object[][]::new);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,9 @@ public interface WorkspaceStatusEvent {
Map<String, String> getOptions();

WorkspaceStatusEvent withOptions(Map<String, String> options);

/** @return whether event cause by some concrete user's request */
boolean isInitiatedByUser();

WorkspaceStatusEvent withInitiatedByUser(boolean isInitiatedByUser);
}
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ public CompletableFuture<Void> startAsync(
workspace.getId(),
sessionUserNameOr("undefined"));

publishWorkspaceStatusEvent(workspaceId, STARTING, STOPPED, null, options);
publishWorkspaceStatusEvent(workspaceId, STARTING, STOPPED, null, true, options);
return CompletableFuture.runAsync(
ThreadLocalPropagateContext.wrap(new StartRuntimeTask(workspace, options, runtime)),
sharedPool.getExecutor());
Expand Down Expand Up @@ -544,7 +544,8 @@ public CompletableFuture<Void> stopAsync(WorkspaceImpl workspace, Map<String, St
workspace.getName(),
workspace.getId(),
stoppedBy);
publishWorkspaceStatusEvent(workspaceId, STOPPING, status, options.get(WORKSPACE_STOP_REASON));
publishWorkspaceStatusEvent(
workspaceId, STOPPING, status, options.get(WORKSPACE_STOP_REASON), true);
return CompletableFuture.runAsync(
ThreadLocalPropagateContext.wrap(new StopRuntimeTask(workspace, options, stoppedBy)),
sharedPool.getExecutor());
Expand Down Expand Up @@ -784,7 +785,8 @@ InternalRuntime<?> recoverOne(RuntimeInfrastructure infra, RuntimeIdentity ident
identity.getWorkspaceId(),
STOPPED,
STOPPING,
"Workspace is stopped. Reason: " + x.getMessage());
"Workspace is stopped. Reason: " + x.getMessage(),
false);
throw new ServerException(
format(
"Couldn't recover runtime '%s:%s'. Error: %s",
Expand Down Expand Up @@ -830,21 +832,28 @@ private void subscribeAbnormalRuntimeStopListener() {
}

private void publishWorkspaceStatusEvent(
String workspaceId, WorkspaceStatus status, WorkspaceStatus previous, String errorMsg) {
publishWorkspaceStatusEvent(workspaceId, status, previous, errorMsg, emptyMap());
String workspaceId,
WorkspaceStatus status,
WorkspaceStatus previous,
String errorMsg,
boolean isInitiatedByUser) {
publishWorkspaceStatusEvent(
workspaceId, status, previous, errorMsg, isInitiatedByUser, emptyMap());
}

private void publishWorkspaceStatusEvent(
String workspaceId,
WorkspaceStatus status,
WorkspaceStatus previous,
String errorMsg,
boolean isInitiatedByUser,
Map<String, String> options) {
eventService.publish(
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(previous)
.withError(errorMsg)
.withInitiatedByUser(isInitiatedByUser)
.withStatus(status)
.withOptions(options));
}
Expand Down Expand Up @@ -968,7 +977,7 @@ public void run() {
workspace.getName(),
workspaceId,
sessionUserNameOr("undefined"));
publishWorkspaceStatusEvent(workspaceId, RUNNING, STARTING, null);
publishWorkspaceStatusEvent(workspaceId, RUNNING, STARTING, null, true);
} catch (InfrastructureException e) {
try (Unlocker ignored = lockService.writeLock(workspaceId)) {
runtimes.remove(workspaceId);
Expand All @@ -978,9 +987,11 @@ public void run() {
probeScheduler.cancel(workspaceId);

String failureCause = "failed";
if (e instanceof RuntimeStartInterruptedException) {
boolean isWorkspaceStartInterrupted = e instanceof RuntimeStartInterruptedException;
if (isWorkspaceStartInterrupted) {
failureCause = "interrupted";
}

LOG.info(
"Workspace '{}:{}' with id '{}' start {}",
workspace.getNamespace(),
Expand All @@ -993,7 +1004,8 @@ public void run() {
if (e instanceof InternalInfrastructureException) {
LOG.error(e.getLocalizedMessage(), e);
}
publishWorkspaceStatusEvent(workspaceId, STOPPED, STARTING, e.getMessage());
publishWorkspaceStatusEvent(
workspaceId, STOPPED, STARTING, e.getMessage(), isWorkspaceStartInterrupted);
throw new RuntimeException(e);
}
}
Expand Down Expand Up @@ -1033,7 +1045,7 @@ public void run() {
workspace.getName(),
workspaceId,
stoppedBy);
publishWorkspaceStatusEvent(workspaceId, STOPPED, STOPPING, null);
publishWorkspaceStatusEvent(workspaceId, STOPPED, STOPPING, null, true);
} catch (ServerException | InfrastructureException e) {
// remove before firing an event to have consistency between state and the event
try (Unlocker ignored = lockService.writeLock(workspaceId)) {
Expand Down Expand Up @@ -1064,7 +1076,8 @@ public void run() {
workspaceId,
STOPPED,
STOPPING,
"Error occurs on workspace runtime stop. Error: " + e.getMessage());
"Error occurs on workspace runtime stop. Error: " + e.getMessage(),
false);
throw new RuntimeException(e);
}
}
Expand Down Expand Up @@ -1101,7 +1114,8 @@ public void onEvent(RuntimeAbnormalStoppingEvent event) {
workspaceId,
STOPPING,
previousStatus,
"Workspace is going to be STOPPED. Reason: " + event.getReason());
"Workspace is going to be STOPPED. Reason: " + event.getReason(),
false);
}
}

Expand Down Expand Up @@ -1139,7 +1153,8 @@ public void onEvent(RuntimeAbnormalStoppedEvent event) {
workspaceId,
STOPPED,
previousStatus,
"Workspace is stopped. Reason: " + event.getReason());
"Workspace is stopped. Reason: " + event.getReason(),
false);
setAbnormalStopAttributes(workspaceId, event.getReason());
}

Expand Down
Loading