Skip to content

Hadoop 18325: ABFS: Add correlated metric support for ABFS operations #4564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 49 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
98d2901
Metric
anmolasrani123 May 12, 2022
16612d3
Metric changes
anmolasrani123 May 16, 2022
fb2295e
Metric changes
anmolasrani123 May 20, 2022
986d2e6
Metrics
anmolasrani123 May 27, 2022
23d9b47
Added final metrics
anmolasrani123 Jun 6, 2022
d57a746
Checkstyle fixes
anmolasrani123 Jun 7, 2022
4476d33
Added new params to metrics
anmolasrani123 Jun 14, 2022
d7b6be8
Incorrect total requests fix
anmolasrani123 Jun 21, 2022
324d2c0
Adding thread safety for metrics update
anmolasrani123 Jun 23, 2022
b9273d3
Made the metric enablement configurable
anmolasrani123 Jul 5, 2022
5fc1417
Removed hardcoded value for maxIoRetries
anmolasrani123 Jul 6, 2022
b185269
Fix null pointer exceptions
anmolasrani123 Jul 7, 2022
ef9691a
Fix for acronyms
anmolasrani123 Jul 8, 2022
3bddb11
Fix for spacing issues
anmolasrani123 Jul 11, 2022
4afba69
Added documentation for configs
anmolasrani123 Jul 12, 2022
0edb368
Update variable name
anmolasrani123 Jul 12, 2022
791b84e
Added config for URI
anmolasrani123 Jul 12, 2022
f6bad55
Test for max retries
anmolasrani123 Jul 13, 2022
9230bd8
Add test case for retry
anmolasrani123 Jul 13, 2022
e1c4c61
Refactored the test class
anmolasrani123 Jul 14, 2022
07aa9af
Merge branch 'metrics' of https://github.com/anmolanmol1234/hadoop in…
anmolasrani123 Jul 14, 2022
7686cce
Added comments
anmolasrani123 Jul 14, 2022
f9de500
Merge branch 'metrics' of https://github.com/anmolanmol1234/hadoop in…
anmolasrani123 Jul 14, 2022
f66bfe3
Back to previous configs
anmolasrani123 Jul 14, 2022
d0c0f68
remove test case
anmolasrani123 Jul 14, 2022
637060f
remove test case
anmolasrani123 Jul 14, 2022
a477400
remove test case
anmolasrani123 Jul 14, 2022
0f44a9b
Fix for checkstyle errors
anmolasrani123 Jul 15, 2022
312853c
Checkstyle error fixes
anmolasrani123 Jul 18, 2022
bca9c4e
Spotbugs fixes
anmolasrani123 Jul 18, 2022
4e99643
Merge branch 'apache:trunk' into HADOOP-18325
anmolanmol1234 Jul 25, 2022
15337ff
Added support for read footer metrics
saxenapranav Jul 28, 2022
5706ca0
fix for file size
anmolasrani123 Aug 2, 2022
3abca57
Checkstyle fixes
anmolasrani123 Aug 3, 2022
4889c59
Merge branch 'HADOOP-18325' into HADOOP-18325_footerMetrics
anmolasrani123 Aug 3, 2022
6cc7d37
Reformating and refactoring the code
anmolasrani123 Aug 3, 2022
05acafc
Update documentation
anmolasrani123 Aug 3, 2022
388d837
Checkstyle fixes
anmolasrani123 Aug 4, 2022
da41836
Added new tracing class for metrics
anmolasrani123 Aug 8, 2022
d55aa47
Additional tracing header formats
anmolasrani123 Aug 10, 2022
e26ee5f
Test for footer metrics
anmolasrani123 Aug 12, 2022
5f680d0
Test for footer metrics
anmolasrani123 Aug 12, 2022
a0d3ef9
Checkstyle fixes
anmolasrani123 Aug 12, 2022
63abb7f
Tracing header fix
anmolasrani123 Aug 12, 2022
eb381e1
Removing unused fields
anmolasrani123 Aug 16, 2022
386da87
Merge branch 'apache:trunk' into HADOOP-18325_footerMetrics
anmolanmol1234 Aug 16, 2022
40868de
Removing unused fields
anmolasrani123 Aug 16, 2022
1f6827f
Merge branch 'apache:trunk' into HADOOP-18325
anmolanmol1234 Oct 7, 2022
d22b897
Merge branch 'apache:trunk' into HADOOP-18325
anmolanmol1234 Oct 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@
<suppressions>
<suppress checks="ParameterNumber|MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]AzureBlobFileSystemStore.java"/>
<suppress checks="MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]AbfsDriverMetrics.java"/>
<suppress checks="MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]AbfsBackoffMetrics.java"/>
<suppress checks="MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]services[\\/]AbfsRestOperation.java"/>
<suppress checks="MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]services[\\/]AbfsInputStream.java"/>
<suppress checks="MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]services[\\/]TestAbfsRestOperation.java"/>
<suppress checks="ParameterNumber|MagicNumber"
files="org[\\/]apache[\\/]hadoop[\\/]fs[\\/]azurebfs[\\/]utils[\\/]Base64.java"/>
<suppress checks="ParameterNumber|VisibilityModifier"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs;

import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.Map;
import java.util.ArrayList;
import java.util.Arrays;

public class AbfsBackoffMetrics {

private AtomicLong numberOfRequestsSucceeded;

private AtomicLong minBackoff;

private AtomicLong maxBackoff;

private AtomicLong totalRequests;

private AtomicLong totalBackoff;

private String retryCount;

private AtomicLong numberOfIOPSThrottledRequests;

private AtomicLong numberOfBandwidthThrottledRequests;

private AtomicLong numberOfOtherThrottledRequests;

private AtomicLong numberOfNetworkFailedRequests;

private AtomicLong maxRetryCount;

private AtomicLong totalNumberOfRequests;

private AtomicLong numberOfRequestsSucceededWithoutRetrying;

private AtomicLong numberOfRequestsFailed;

private final Map<String, AbfsBackoffMetrics> metricsMap
= new ConcurrentHashMap<>();

public AbfsBackoffMetrics() {
initializeMap();
this.numberOfIOPSThrottledRequests = new AtomicLong();
this.numberOfBandwidthThrottledRequests = new AtomicLong();
this.numberOfOtherThrottledRequests = new AtomicLong();
this.totalNumberOfRequests = new AtomicLong();
this.maxRetryCount = new AtomicLong();
this.numberOfRequestsSucceededWithoutRetrying = new AtomicLong();
this.numberOfRequestsFailed = new AtomicLong();
this.numberOfNetworkFailedRequests = new AtomicLong();
}

public AbfsBackoffMetrics(String retryCount) {
this.retryCount = retryCount;
this.numberOfRequestsSucceeded = new AtomicLong();
this.minBackoff = new AtomicLong(Long.MAX_VALUE);
this.maxBackoff = new AtomicLong();
this.totalRequests = new AtomicLong();
this.totalBackoff = new AtomicLong();
}

private void initializeMap() {
ArrayList<String> retryCountList = new ArrayList<String>(
Arrays.asList("1", "2", "3", "4", "5_15", "15_25", "25AndAbove"));
for (String s : retryCountList) {
metricsMap.put(s, new AbfsBackoffMetrics(s));
}
}

public AtomicLong getNumberOfRequestsSucceeded() {
return numberOfRequestsSucceeded;
}

public AtomicLong getMinBackoff() {
return minBackoff;
}

public AtomicLong getMaxBackoff() {
return maxBackoff;
}

public AtomicLong getTotalRequests() {
return totalRequests;
}

public AtomicLong getTotalBackoff() {
return totalBackoff;
}

public String getRetryCount() {
return retryCount;
}

public AtomicLong getNumberOfIOPSThrottledRequests() {
return numberOfIOPSThrottledRequests;
}

public AtomicLong getNumberOfBandwidthThrottledRequests() {
return numberOfBandwidthThrottledRequests;
}

public AtomicLong getNumberOfOtherThrottledRequests() {
return numberOfOtherThrottledRequests;
}

public AtomicLong getMaxRetryCount() {
return maxRetryCount;
}

public AtomicLong getTotalNumberOfRequests() {
return totalNumberOfRequests;
}

public Map<String, AbfsBackoffMetrics> getMetricsMap() {
return metricsMap;
}

public AtomicLong getNumberOfRequestsSucceededWithoutRetrying() {
return numberOfRequestsSucceededWithoutRetrying;
}

public AtomicLong getNumberOfRequestsFailed() {
return numberOfRequestsFailed;
}

public AtomicLong getNumberOfNetworkFailedRequests() {
return numberOfNetworkFailedRequests;
}

/*
Acronyms :-
1.RCTSI :- Request count that succeeded in x retries
2.MMA :- Min Max Average (This refers to the backoff or sleep time between 2 requests)
3.s :- seconds
4.BWT :- Number of Bandwidth throttled requests
5.IT :- Number of IOPS throttled requests
6.OT :- Number of Other throttled requests
7.NFR :- Number of requests which failed due to network errors
8.%RT :- Percentage of requests that are throttled
9.TRNR :- Total number of requests which succeeded without retrying
10.TRF :- Total number of requests which failed
11.TR :- Total number of requests which were made
12.MRC :- Max retry count across all requests
*/
@Override
public String toString() {
StringBuilder metricString = new StringBuilder();
long totalRequestsThrottled = numberOfBandwidthThrottledRequests.get()
+ numberOfIOPSThrottledRequests.get()
+ numberOfOtherThrottledRequests.get();
double percentageOfRequestsThrottled =
((double) totalRequestsThrottled / totalNumberOfRequests.get()) * 100;
for (Map.Entry<String, AbfsBackoffMetrics> entry : metricsMap.entrySet()) {
metricString.append("#RCTSI#_").append(entry.getKey())
.append("R_").append("=")
.append(entry.getValue().getNumberOfRequestsSucceeded()).append(" ");
long totalRequests = entry.getValue().getTotalRequests().get();
if (totalRequests > 0) {
metricString.append("#MMA#_").append(entry.getKey())
.append("R_").append("=")
.append(String.format("%.3f",
(double) entry.getValue().getMinBackoff().get() / 1000L))
.append("s ")
.append(String.format("%.3f",
(double) entry.getValue().getMaxBackoff().get() / 1000L))
.append("s ")
.append(String.format("%.3f",
((double) entry.getValue().getTotalBackoff().get() / totalRequests)
/ 1000L))
.append("s ");
} else {
metricString.append("#MMA#_").append(entry.getKey())
.append("R_").append("=0s ");
}
}
metricString.append("#BWT=")
.append(numberOfBandwidthThrottledRequests)
.append(" #IT=")
.append(numberOfIOPSThrottledRequests)
.append(" #OT=")
.append(numberOfOtherThrottledRequests)
.append(" #%RT=")
.append(String.format("%.3f", percentageOfRequestsThrottled))
.append(" #NFR=")
.append(numberOfNetworkFailedRequests)
.append(" #TRNR=")
.append(numberOfRequestsSucceededWithoutRetrying)
.append(" #TRF=")
.append(numberOfRequestsFailed)
.append(" #TR=")
.append(totalNumberOfRequests)
.append(" #MRC=")
.append(maxRetryCount);

return metricString + " ";
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,13 @@ public TracingHeaderFormat getTracingHeaderFormat() {
return getEnum(FS_AZURE_TRACINGHEADER_FORMAT, TracingHeaderFormat.ALL_ID_FORMAT);
}

/**
* Enum config to allow user to pick format of x-ms-client-request-id header
* @return tracingContextFormat config if valid, else default ALL_ID_FORMAT
*/
public TracingHeaderFormat getTracingMetricHeaderFormat() {
return getEnum(FS_AZURE_TRACINGMETRICHEADER_FORMAT, TracingHeaderFormat.EMPTY);
}
public AuthType getAuthType(String accountName) {
return getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
import java.net.URI;
import java.util.Map;
import java.util.UUID;

import java.util.List;
import java.util.ArrayList;
import org.apache.hadoop.classification.VisibleForTesting;

import org.apache.hadoop.fs.azurebfs.services.AbfsCounters;
Expand All @@ -33,9 +34,10 @@
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
import org.apache.hadoop.metrics2.lib.MutableMetric;

import org.apache.hadoop.fs.azurebfs.services.AbfsReadFooterMetrics;
import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*;
import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore;
import java.util.concurrent.atomic.AtomicReference;

/**
* Instrumentation of Abfs counters.
Expand Down Expand Up @@ -63,6 +65,10 @@ public class AbfsCountersImpl implements AbfsCounters {

private final IOStatisticsStore ioStatisticsStore;

private AtomicReference<AbfsBackoffMetrics> abfsBackoffMetrics = null;

private List<AbfsReadFooterMetrics> readFooterMetricsList;

private static final AbfsStatistic[] STATISTIC_LIST = {
CALL_CREATE,
CALL_OPEN,
Expand Down Expand Up @@ -121,6 +127,8 @@ public AbfsCountersImpl(URI uri) {
ioStatisticsStoreBuilder.withDurationTracking(durationStats.getStatName());
}
ioStatisticsStore = ioStatisticsStoreBuilder.build();
abfsBackoffMetrics = new AtomicReference<>(new AbfsBackoffMetrics());
readFooterMetricsList = new ArrayList<>();
}

/**
Expand Down Expand Up @@ -188,6 +196,14 @@ private MetricsRegistry getRegistry() {
return registry;
}

public AbfsBackoffMetrics getAbfsBackoffMetrics() {
return abfsBackoffMetrics.get();
}

public List<AbfsReadFooterMetrics> getAbfsReadFooterMetrics() {
return readFooterMetricsList;
}

/**
* {@inheritDoc}
*
Expand Down
Loading