HDFS-16139. Update BPServiceActor Scheduler's nextBlockReportTime atomically (#3228). Contributed by Viraj Jasani.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
Viraj Jasani 2021-07-27 12:27:12 +05:30 committed by GitHub
parent 97c88c97de
commit b038042ece
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 25 deletions

View File

@ -39,6 +39,7 @@
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
@ -323,10 +324,10 @@ private void connectToNNAndHandshake() throws IOException {
void triggerBlockReportForTests() { void triggerBlockReportForTests() {
synchronized (ibrManager) { synchronized (ibrManager) {
scheduler.scheduleHeartbeat(); scheduler.scheduleHeartbeat();
long oldBlockReportTime = scheduler.nextBlockReportTime; long oldBlockReportTime = scheduler.getNextBlockReportTime();
scheduler.forceFullBlockReportNow(); scheduler.forceFullBlockReportNow();
ibrManager.notifyAll(); ibrManager.notifyAll();
while (oldBlockReportTime == scheduler.nextBlockReportTime) { while (oldBlockReportTime == scheduler.getNextBlockReportTime()) {
try { try {
ibrManager.wait(100); ibrManager.wait(100);
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -1163,8 +1164,8 @@ static class Scheduler {
// nextBlockReportTime and nextHeartbeatTime may be assigned/read // nextBlockReportTime and nextHeartbeatTime may be assigned/read
// by testing threads (through BPServiceActor#triggerXXX), while also // by testing threads (through BPServiceActor#triggerXXX), while also
// assigned/read by the actor thread. // assigned/read by the actor thread.
@VisibleForTesting private final AtomicLong nextBlockReportTime =
volatile long nextBlockReportTime = monotonicNow(); new AtomicLong(monotonicNow());
@VisibleForTesting @VisibleForTesting
volatile long nextHeartbeatTime = monotonicNow(); volatile long nextHeartbeatTime = monotonicNow();
@ -1257,7 +1258,7 @@ boolean isLifelineDue(long startTime) {
} }
boolean isBlockReportDue(long curTime) { boolean isBlockReportDue(long curTime) {
return nextBlockReportTime - curTime <= 0; return nextBlockReportTime.get() - curTime <= 0;
} }
boolean isOutliersReportDue(long curTime) { boolean isOutliersReportDue(long curTime) {
@ -1281,15 +1282,15 @@ void forceFullBlockReportNow() {
long scheduleBlockReport(long delay, boolean isRegistration) { long scheduleBlockReport(long delay, boolean isRegistration) {
if (delay > 0) { // send BR after random delay if (delay > 0) { // send BR after random delay
// Numerical overflow is possible here and is okay. // Numerical overflow is possible here and is okay.
nextBlockReportTime = nextBlockReportTime.getAndSet(
monotonicNow() + ThreadLocalRandom.current().nextInt((int) (delay)); monotonicNow() + ThreadLocalRandom.current().nextInt((int) (delay)));
} else { // send at next heartbeat } else { // send at next heartbeat
nextBlockReportTime = monotonicNow(); nextBlockReportTime.getAndSet(monotonicNow());
} }
resetBlockReportTime = isRegistration; // reset future BRs for resetBlockReportTime = isRegistration; // reset future BRs for
// randomness, post first block report to avoid regular BRs from all // randomness, post first block report to avoid regular BRs from all
// DN's coming at one time. // DN's coming at one time.
return nextBlockReportTime; return nextBlockReportTime.get();
} }
/** /**
@ -1302,8 +1303,8 @@ void scheduleNextBlockReport() {
// If we have sent the first set of block reports, then wait a random // If we have sent the first set of block reports, then wait a random
// time before we start the periodic block reports. // time before we start the periodic block reports.
if (resetBlockReportTime) { if (resetBlockReportTime) {
nextBlockReportTime = monotonicNow() + nextBlockReportTime.getAndSet(monotonicNow() +
ThreadLocalRandom.current().nextInt((int)(blockReportIntervalMs)); ThreadLocalRandom.current().nextInt((int) (blockReportIntervalMs)));
resetBlockReportTime = false; resetBlockReportTime = false;
} else { } else {
/* say the last block report was at 8:20:14. The current report /* say the last block report was at 8:20:14. The current report
@ -1313,17 +1314,16 @@ void scheduleNextBlockReport() {
* 2) unexpected like 21:35:43, next report should be at 2:20:14 * 2) unexpected like 21:35:43, next report should be at 2:20:14
* on the next day. * on the next day.
*/ */
long factor = long factor = (monotonicNow() - nextBlockReportTime.get()
(monotonicNow() - nextBlockReportTime + blockReportIntervalMs) + blockReportIntervalMs) / blockReportIntervalMs;
/ blockReportIntervalMs;
if (factor != 0) { if (factor != 0) {
nextBlockReportTime += factor * blockReportIntervalMs; nextBlockReportTime.getAndAdd(factor * blockReportIntervalMs);
} else { } else {
// If the difference between the present time and the scheduled // If the difference between the present time and the scheduled
// time is very less, the factor can be 0, so in that case, we can // time is very less, the factor can be 0, so in that case, we can
// ignore that negligible time, spent while sending the BRss and // ignore that negligible time, spent while sending the BRss and
// schedule the next BR after the blockReportInterval. // schedule the next BR after the blockReportInterval.
nextBlockReportTime += blockReportIntervalMs; nextBlockReportTime.getAndAdd(blockReportIntervalMs);
} }
} }
} }
@ -1336,6 +1336,16 @@ long getLifelineWaitTime() {
return nextLifelineTime - monotonicNow(); return nextLifelineTime - monotonicNow();
} }
@VisibleForTesting
long getNextBlockReportTime() {
return nextBlockReportTime.get();
}
@VisibleForTesting
void setNextBlockReportTime(long nextBlockReportTime) {
this.nextBlockReportTime.getAndSet(nextBlockReportTime);
}
/** /**
* Wrapped for testing. * Wrapped for testing.
* @return * @return

View File

@ -31,6 +31,7 @@
import static java.lang.Math.abs; import static java.lang.Math.abs;
import static org.hamcrest.core.Is.is; import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat; import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
@ -70,7 +71,7 @@ public void testScheduleBlockReportImmediate() {
Scheduler scheduler = makeMockScheduler(now); Scheduler scheduler = makeMockScheduler(now);
scheduler.scheduleBlockReport(0, true); scheduler.scheduleBlockReport(0, true);
assertTrue(scheduler.resetBlockReportTime); assertTrue(scheduler.resetBlockReportTime);
assertThat(scheduler.nextBlockReportTime, is(now)); assertThat(scheduler.getNextBlockReportTime(), is(now));
} }
} }
@ -81,8 +82,8 @@ public void testScheduleBlockReportDelayed() {
final long delayMs = 10; final long delayMs = 10;
scheduler.scheduleBlockReport(delayMs, true); scheduler.scheduleBlockReport(delayMs, true);
assertTrue(scheduler.resetBlockReportTime); assertTrue(scheduler.resetBlockReportTime);
assertTrue(scheduler.nextBlockReportTime - now >= 0); assertTrue(scheduler.getNextBlockReportTime() - now >= 0);
assertTrue(scheduler.nextBlockReportTime - (now + delayMs) < 0); assertTrue(scheduler.getNextBlockReportTime() - (now + delayMs) < 0);
} }
} }
@ -96,7 +97,8 @@ public void testScheduleNextBlockReport() {
Scheduler scheduler = makeMockScheduler(now); Scheduler scheduler = makeMockScheduler(now);
assertTrue(scheduler.resetBlockReportTime); assertTrue(scheduler.resetBlockReportTime);
scheduler.scheduleNextBlockReport(); scheduler.scheduleNextBlockReport();
assertTrue(scheduler.nextBlockReportTime - (now + BLOCK_REPORT_INTERVAL_MS) < 0); assertTrue(scheduler.getNextBlockReportTime()
- (now + BLOCK_REPORT_INTERVAL_MS) < 0);
} }
} }
@ -110,7 +112,8 @@ public void testScheduleNextBlockReport2() {
Scheduler scheduler = makeMockScheduler(now); Scheduler scheduler = makeMockScheduler(now);
scheduler.resetBlockReportTime = false; scheduler.resetBlockReportTime = false;
scheduler.scheduleNextBlockReport(); scheduler.scheduleNextBlockReport();
assertThat(scheduler.nextBlockReportTime, is(now + BLOCK_REPORT_INTERVAL_MS)); assertThat(scheduler.getNextBlockReportTime(),
is(now + BLOCK_REPORT_INTERVAL_MS));
} }
} }
@ -129,10 +132,12 @@ public void testScheduleNextBlockReport3() {
final long blockReportDelay = final long blockReportDelay =
BLOCK_REPORT_INTERVAL_MS + random.nextInt(2 * (int) BLOCK_REPORT_INTERVAL_MS); BLOCK_REPORT_INTERVAL_MS + random.nextInt(2 * (int) BLOCK_REPORT_INTERVAL_MS);
final long origBlockReportTime = now - blockReportDelay; final long origBlockReportTime = now - blockReportDelay;
scheduler.nextBlockReportTime = origBlockReportTime; scheduler.setNextBlockReportTime(origBlockReportTime);
scheduler.scheduleNextBlockReport(); scheduler.scheduleNextBlockReport();
assertTrue(scheduler.nextBlockReportTime - now < BLOCK_REPORT_INTERVAL_MS); assertTrue((scheduler.getNextBlockReportTime() - now)
assertTrue(((scheduler.nextBlockReportTime - origBlockReportTime) % BLOCK_REPORT_INTERVAL_MS) == 0); < BLOCK_REPORT_INTERVAL_MS);
assertEquals(0, ((scheduler.getNextBlockReportTime() - origBlockReportTime)
% BLOCK_REPORT_INTERVAL_MS));
} }
} }
@ -201,7 +206,7 @@ private Scheduler makeMockScheduler(long now) {
HEARTBEAT_INTERVAL_MS, LIFELINE_INTERVAL_MS, HEARTBEAT_INTERVAL_MS, LIFELINE_INTERVAL_MS,
BLOCK_REPORT_INTERVAL_MS, OUTLIER_REPORT_INTERVAL_MS)); BLOCK_REPORT_INTERVAL_MS, OUTLIER_REPORT_INTERVAL_MS));
doReturn(now).when(mockScheduler).monotonicNow(); doReturn(now).when(mockScheduler).monotonicNow();
mockScheduler.nextBlockReportTime = now; mockScheduler.setNextBlockReportTime(now);
mockScheduler.nextHeartbeatTime = now; mockScheduler.nextHeartbeatTime = now;
mockScheduler.nextOutliersReportTime = now; mockScheduler.nextOutliersReportTime = now;
return mockScheduler; return mockScheduler;