Skip to content

Commit

Permalink
Distinct interrupted from failed to start workspace metric (eclipse-c…
Browse files Browse the repository at this point in the history
…he#18258)

* Distinct interrupted from failed to start workspace metric

Signed-off-by: Sergii Kabashniuk <skabashniuk@redhat.com>
  • Loading branch information
skabashnyuk committed Nov 5, 2020
1 parent 6c03073 commit 7007453
Show file tree
Hide file tree
Showing 8 changed files with 275 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
*/
@Singleton
public class WorkspaceFailureMeterBinder implements MeterBinder {

private final EventService eventService;

private Counter startingStoppedFailureCounter;
Expand All @@ -50,10 +51,11 @@ public void bindTo(MeterRegistry registry) {
// only subscribe to the event once we have the counters ready
eventService.subscribe(
event -> {
if (event.getError() == null || event.getStatus() != WorkspaceStatus.STOPPED) {
if (event.getError() == null
|| event.getStatus() != WorkspaceStatus.STOPPED
|| event.isInitiatedByUser()) {
return;
}

Counter counter;
switch (event.getPrevStatus()) {
case STARTING:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2012-2018 Red Hat, Inc.
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Red Hat, Inc. - initial API and implementation
*/
package org.eclipse.che.api.metrics;

import static org.eclipse.che.api.metrics.WorkspaceBinders.workspaceMetric;

import com.google.inject.Inject;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.binder.MeterBinder;
import javax.inject.Singleton;
import org.eclipse.che.api.core.model.workspace.WorkspaceStatus;
import org.eclipse.che.api.core.notification.EventService;
import org.eclipse.che.api.workspace.shared.dto.event.WorkspaceStatusEvent;

/** Counts number of workspace startup interruption. */
@Singleton
public class WorkspaceInterruptedStartAttemptsMeterBinder implements MeterBinder {
private final EventService eventService;

private Counter interruptionCounter;

@Inject
public WorkspaceInterruptedStartAttemptsMeterBinder(EventService eventService) {
this.eventService = eventService;
}

@Override
public void bindTo(MeterRegistry registry) {
interruptionCounter =
Counter.builder(workspaceMetric("start.interrupt.total"))
.description("The count of workspace startup interruption")
.register(registry);

// only subscribe to the event once we have the counters ready
eventService.subscribe(
event -> {
if (event.getPrevStatus() == WorkspaceStatus.STARTING
&& event.getStatus() == WorkspaceStatus.STOPPED
&& event.isInitiatedByUser()) {
interruptionCounter.increment();
}
},
WorkspaceStatusEvent.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ protected void configure() {
meterMultibinder.addBinding().to(WorkspaceSuccessfulStartAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceSuccessfulStopAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceStartAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceInterruptedStartAttemptsMeterBinder.class);
meterMultibinder.addBinding().to(UserMeterBinder.class);
meterMultibinder.addBinding().to(RuntimeLogMeterBinder.class);
meterMultibinder.addBinding().to(WorkspaceMeterBinder.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,24 @@ public void shouldNotCollectFailureWhenNoErrorInEvent(WorkspaceStatus prevStatus
failureCounters.forEach(c -> assertEquals(c.count(), 0d));
}

@Test
public void shouldNotCollectInterruptedEvent() {
// given
WorkspaceStatusEvent event =
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withInitiatedByUser(true)
.withError("interrupted")
.withWorkspaceId("1");

// when
events.onEvent(event);

// then
failureCounters.forEach(c -> assertEquals(c.count(), 0d));
}

@Test(dataProvider = "allStatusTransitionsWithoutToStopped")
public void shouldNotCollectFailureWhenNotTransitioningToStopped(
WorkspaceStatus from, WorkspaceStatus to) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
* Copyright (c) 2012-2018 Red Hat, Inc.
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Red Hat, Inc. - initial API and implementation
*/
package org.eclipse.che.api.metrics;

import static java.util.Arrays.asList;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.che.api.core.model.workspace.WorkspaceStatus;
import org.eclipse.che.api.core.notification.EventService;
import org.eclipse.che.api.core.notification.EventSubscriber;
import org.eclipse.che.api.workspace.shared.dto.event.WorkspaceStatusEvent;
import org.eclipse.che.dto.server.DtoFactory;
import org.mockito.ArgumentCaptor;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

public class WorkspaceInterruptedStartAttemptsMeterBinderTest {

private EventSubscriber<WorkspaceStatusEvent> events;
private Counter interruptedCounter;

@BeforeMethod
public void setup() {
MeterRegistry registry = new SimpleMeterRegistry();

EventService eventService = mock(EventService.class);

WorkspaceInterruptedStartAttemptsMeterBinder meterBinder =
new WorkspaceInterruptedStartAttemptsMeterBinder(eventService);

meterBinder.bindTo(registry);

@SuppressWarnings("unchecked")
ArgumentCaptor<EventSubscriber<WorkspaceStatusEvent>> statusChangeEventCaptor =
ArgumentCaptor.forClass(EventSubscriber.class);

interruptedCounter = registry.find("che.workspace.start.interrupt.total").counter();

verify(eventService)
.subscribe(statusChangeEventCaptor.capture(), eq(WorkspaceStatusEvent.class));

events = statusChangeEventCaptor.getValue();
}

@Test
public void shouldCountWorkspaceInterruptedEvent() {
// given
WorkspaceStatusEvent event =
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withInitiatedByUser(true)
.withError("interrupted")
.withWorkspaceId("1");

// when
events.onEvent(event);

// then
Assert.assertEquals(interruptedCounter.count(), 1.0);
}

@Test
public void shouldNotCountWorkspaceNonInterruptedEvent() {
// given
WorkspaceStatusEvent event =
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withInitiatedByUser(false)
.withError("interrupted")
.withWorkspaceId("1");

// when
events.onEvent(event);

// then
Assert.assertEquals(interruptedCounter.count(), 0.0);
}

@Test(dataProvider = "allStatusTransitionsWithoutToStopped")
public void shouldNotCollectInterruptionWhenNotTransitioningToStopped(
WorkspaceStatus from, WorkspaceStatus to) {
// This really doesn't make much sense because the codebase always transitions the workspace
// to STOPPED on interruption. This is just a precaution that a potential bug in the
// rest of the codebase doesn't affect the metric collection ;)

events.onEvent(
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withPrevStatus(from)
.withStatus(to)
.withInitiatedByUser(true)
.withError("D'oh!")
.withWorkspaceId("1"));

Assert.assertEquals(interruptedCounter.count(), 0.0);
}

@DataProvider
public Object[][] allStatusTransitionsWithoutToStopped() {
List<List<WorkspaceStatus>> transitions = new ArrayList<>(9);

for (WorkspaceStatus from : WorkspaceStatus.values()) {
for (WorkspaceStatus to : WorkspaceStatus.values()) {
if (from == to || to == WorkspaceStatus.STOPPED) {
continue;
}

transitions.add(asList(from, to));
}
}

return transitions.stream().map(List::toArray).toArray(Object[][]::new);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,9 @@ public interface WorkspaceStatusEvent {
Map<String, String> getOptions();

WorkspaceStatusEvent withOptions(Map<String, String> options);

/** @return whether event cause by some concrete user's request */
boolean isInitiatedByUser();

WorkspaceStatusEvent withInitiatedByUser(boolean isInitiatedByUser);
}
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ public CompletableFuture<Void> startAsync(
workspace.getId(),
sessionUserNameOr("undefined"));

publishWorkspaceStatusEvent(workspaceId, STARTING, STOPPED, null, options);
publishWorkspaceStatusEvent(workspaceId, STARTING, STOPPED, null, true, options);
return CompletableFuture.runAsync(
ThreadLocalPropagateContext.wrap(new StartRuntimeTask(workspace, options, runtime)),
sharedPool.getExecutor());
Expand Down Expand Up @@ -544,7 +544,8 @@ public CompletableFuture<Void> stopAsync(WorkspaceImpl workspace, Map<String, St
workspace.getName(),
workspace.getId(),
stoppedBy);
publishWorkspaceStatusEvent(workspaceId, STOPPING, status, options.get(WORKSPACE_STOP_REASON));
publishWorkspaceStatusEvent(
workspaceId, STOPPING, status, options.get(WORKSPACE_STOP_REASON), true);
return CompletableFuture.runAsync(
ThreadLocalPropagateContext.wrap(new StopRuntimeTask(workspace, options, stoppedBy)),
sharedPool.getExecutor());
Expand Down Expand Up @@ -784,7 +785,8 @@ InternalRuntime<?> recoverOne(RuntimeInfrastructure infra, RuntimeIdentity ident
identity.getWorkspaceId(),
STOPPED,
STOPPING,
"Workspace is stopped. Reason: " + x.getMessage());
"Workspace is stopped. Reason: " + x.getMessage(),
false);
throw new ServerException(
format(
"Couldn't recover runtime '%s:%s'. Error: %s",
Expand Down Expand Up @@ -830,21 +832,28 @@ private void subscribeAbnormalRuntimeStopListener() {
}

private void publishWorkspaceStatusEvent(
String workspaceId, WorkspaceStatus status, WorkspaceStatus previous, String errorMsg) {
publishWorkspaceStatusEvent(workspaceId, status, previous, errorMsg, emptyMap());
String workspaceId,
WorkspaceStatus status,
WorkspaceStatus previous,
String errorMsg,
boolean isInitiatedByUser) {
publishWorkspaceStatusEvent(
workspaceId, status, previous, errorMsg, isInitiatedByUser, emptyMap());
}

private void publishWorkspaceStatusEvent(
String workspaceId,
WorkspaceStatus status,
WorkspaceStatus previous,
String errorMsg,
boolean isInitiatedByUser,
Map<String, String> options) {
eventService.publish(
DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(previous)
.withError(errorMsg)
.withInitiatedByUser(isInitiatedByUser)
.withStatus(status)
.withOptions(options));
}
Expand Down Expand Up @@ -968,7 +977,7 @@ public void run() {
workspace.getName(),
workspaceId,
sessionUserNameOr("undefined"));
publishWorkspaceStatusEvent(workspaceId, RUNNING, STARTING, null);
publishWorkspaceStatusEvent(workspaceId, RUNNING, STARTING, null, true);
} catch (InfrastructureException e) {
try (Unlocker ignored = lockService.writeLock(workspaceId)) {
runtimes.remove(workspaceId);
Expand All @@ -978,9 +987,11 @@ public void run() {
probeScheduler.cancel(workspaceId);

String failureCause = "failed";
if (e instanceof RuntimeStartInterruptedException) {
boolean isWorkspaceStartInterrupted = e instanceof RuntimeStartInterruptedException;
if (isWorkspaceStartInterrupted) {
failureCause = "interrupted";
}

LOG.info(
"Workspace '{}:{}' with id '{}' start {}",
workspace.getNamespace(),
Expand All @@ -993,7 +1004,8 @@ public void run() {
if (e instanceof InternalInfrastructureException) {
LOG.error(e.getLocalizedMessage(), e);
}
publishWorkspaceStatusEvent(workspaceId, STOPPED, STARTING, e.getMessage());
publishWorkspaceStatusEvent(
workspaceId, STOPPED, STARTING, e.getMessage(), isWorkspaceStartInterrupted);
throw new RuntimeException(e);
}
}
Expand Down Expand Up @@ -1033,7 +1045,7 @@ public void run() {
workspace.getName(),
workspaceId,
stoppedBy);
publishWorkspaceStatusEvent(workspaceId, STOPPED, STOPPING, null);
publishWorkspaceStatusEvent(workspaceId, STOPPED, STOPPING, null, true);
} catch (ServerException | InfrastructureException e) {
// remove before firing an event to have consistency between state and the event
try (Unlocker ignored = lockService.writeLock(workspaceId)) {
Expand Down Expand Up @@ -1064,7 +1076,8 @@ public void run() {
workspaceId,
STOPPED,
STOPPING,
"Error occurs on workspace runtime stop. Error: " + e.getMessage());
"Error occurs on workspace runtime stop. Error: " + e.getMessage(),
false);
throw new RuntimeException(e);
}
}
Expand Down Expand Up @@ -1101,7 +1114,8 @@ public void onEvent(RuntimeAbnormalStoppingEvent event) {
workspaceId,
STOPPING,
previousStatus,
"Workspace is going to be STOPPED. Reason: " + event.getReason());
"Workspace is going to be STOPPED. Reason: " + event.getReason(),
false);
}
}

Expand Down Expand Up @@ -1139,7 +1153,8 @@ public void onEvent(RuntimeAbnormalStoppedEvent event) {
workspaceId,
STOPPED,
previousStatus,
"Workspace is stopped. Reason: " + event.getReason());
"Workspace is stopped. Reason: " + event.getReason(),
false);
setAbnormalStopAttributes(workspaceId, event.getReason());
}

Expand Down
Loading

0 comments on commit 7007453

Please sign in to comment.