HDFS-16082. Atomic operations on exceptionsSinceLastBalance and failedTimesSinceLastSuccessfulBalance in Balancer (#3127)

(cherry picked from commit d9fbb3c5082baf301b082f51eea2f8a2e25e8715)
This commit is contained in:
Viraj Jasani 2021-06-23 08:51:34 +05:30 committed by Wei-Chiu Chuang
parent 4825c7c28d
commit 68604b4cd0
No known key found for this signature in database
GPG Key ID: B362E1C021854B9D

View File

@ -35,6 +35,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hdfs.DFSUtilClient;
@ -204,8 +205,10 @@ public class Balancer {
@VisibleForTesting
private static volatile boolean serviceRunning = false;
private static volatile int exceptionsSinceLastBalance = 0;
private static volatile int failedTimesSinceLastSuccessfulBalance = 0;
private static final AtomicInteger EXCEPTIONS_SINCE_LAST_BALANCE =
new AtomicInteger(0);
private static final AtomicInteger
FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE = new AtomicInteger(0);
private final Dispatcher dispatcher;
private final NameNodeConnector nnc;
@ -266,11 +269,11 @@ public class Balancer {
}
static int getExceptionsSinceLastBalance() {
return exceptionsSinceLastBalance;
return EXCEPTIONS_SINCE_LAST_BALANCE.get();
}
static int getFailedTimesSinceLastSuccessfulBalance() {
return failedTimesSinceLastSuccessfulBalance;
return FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE.get();
}
/**
@ -816,20 +819,21 @@ public class Balancer {
int retCode = doBalance(namenodes, nsIds, p, conf);
if (retCode < 0) {
LOG.info("Balance failed, error code: " + retCode);
failedTimesSinceLastSuccessfulBalance++;
FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE.incrementAndGet();
} else {
LOG.info("Balance succeed!");
failedTimesSinceLastSuccessfulBalance = 0;
FAILED_TIMES_SINCE_LAST_SUCCESSFUL_BALANCE.set(0);
}
exceptionsSinceLastBalance = 0;
EXCEPTIONS_SINCE_LAST_BALANCE.set(0);
} catch (Exception e) {
if (++exceptionsSinceLastBalance > retryOnException) {
if (EXCEPTIONS_SINCE_LAST_BALANCE.incrementAndGet()
> retryOnException) {
// The caller will process and log the exception
throw e;
}
LOG.warn(
"Encounter exception while do balance work. Already tried {} times",
exceptionsSinceLastBalance, e);
EXCEPTIONS_SINCE_LAST_BALANCE, e);
}
// sleep for next round, will retry for next round when it's interrupted