HDFS-3828. Block Scanner rescans blocks too frequently. Contributed by Andy Isaacson

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1381472 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-09-06 06:19:30 +00:00
parent 02c0317eff
commit 01db4d2bb2
4 changed files with 127 additions and 6 deletions

View File

@ -726,6 +726,9 @@ Branch-2 ( Unreleased changes )
HDFS-1490. TransferFSImage should timeout (Dmytro Molkov and Vinay via todd) HDFS-1490. TransferFSImage should timeout (Dmytro Molkov and Vinay via todd)
HDFS-3828. Block Scanner rescans blocks too frequently.
(Andy Isaacson via eli)
BREAKDOWN OF HDFS-3042 SUBTASKS BREAKDOWN OF HDFS-3042 SUBTASKS
HDFS-2185. HDFS portion of ZK-based FailoverController (todd) HDFS-2185. HDFS portion of ZK-based FailoverController (todd)

View File

@ -51,6 +51,8 @@
import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import com.google.common.annotations.VisibleForTesting;
/** /**
* Scans the block files under a block pool and verifies that the * Scans the block files under a block pool and verifies that the
* files are not corrupt. * files are not corrupt.
@ -255,6 +257,11 @@ synchronized void deleteBlock(Block block) {
} }
} }
@VisibleForTesting
long getTotalScans() {
return totalScans;
}
/** @return the last scan time for the block pool. */ /** @return the last scan time for the block pool. */
long getLastScanTime() { long getLastScanTime() {
return lastScanTime.get(); return lastScanTime.get();
@ -563,7 +570,24 @@ private synchronized void startNewPeriod() {
currentPeriodStart = Time.now(); currentPeriodStart = Time.now();
} }
private synchronized boolean workRemainingInCurrentPeriod() {
if (bytesLeft <= 0 && Time.now() < currentPeriodStart + scanPeriod) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping scan since bytesLeft=" + bytesLeft + ", Start=" +
currentPeriodStart + ", period=" + scanPeriod + ", now=" +
Time.now() + " " + blockPoolId);
}
return false;
} else {
return true;
}
}
void scanBlockPoolSlice() { void scanBlockPoolSlice() {
if (!workRemainingInCurrentPeriod()) {
return;
}
// Create a new processedBlocks structure // Create a new processedBlocks structure
processedBlocks = new HashMap<Long, Integer>(); processedBlocks = new HashMap<Long, Integer>();
if (!assignInitialVerificationTimes()) { if (!assignInitialVerificationTimes()) {
@ -608,14 +632,14 @@ private void scan() {
LOG.warn("RuntimeException during BlockPoolScanner.scan()", e); LOG.warn("RuntimeException during BlockPoolScanner.scan()", e);
throw e; throw e;
} finally { } finally {
cleanUp(); rollVerificationLogs();
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Done scanning block pool: " + blockPoolId); LOG.debug("Done scanning block pool: " + blockPoolId);
} }
} }
} }
private synchronized void cleanUp() { private synchronized void rollVerificationLogs() {
if (verificationLog != null) { if (verificationLog != null) {
try { try {
verificationLog.logs.roll(); verificationLog.logs.roll();

View File

@ -34,6 +34,8 @@
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
import com.google.common.annotations.VisibleForTesting;
/** /**
* DataBlockScanner manages block scanning for all the block pools. For each * DataBlockScanner manages block scanning for all the block pools. For each
* block pool a {@link BlockPoolSliceScanner} is created which runs in a separate * block pool a {@link BlockPoolSliceScanner} is created which runs in a separate
@ -47,6 +49,8 @@ public class DataBlockScanner implements Runnable {
private final FsDatasetSpi<? extends FsVolumeSpi> dataset; private final FsDatasetSpi<? extends FsVolumeSpi> dataset;
private final Configuration conf; private final Configuration conf;
static final int SLEEP_PERIOD_MS = 5 * 1000;
/** /**
* Map to find the BlockPoolScanner for a given block pool id. This is updated * Map to find the BlockPoolScanner for a given block pool id. This is updated
* when a BPOfferService becomes alive or dies. * when a BPOfferService becomes alive or dies.
@ -68,10 +72,10 @@ public void run() {
String currentBpId = ""; String currentBpId = "";
boolean firstRun = true; boolean firstRun = true;
while (datanode.shouldRun && !Thread.interrupted()) { while (datanode.shouldRun && !Thread.interrupted()) {
//Sleep everytime except in the first interation. //Sleep everytime except in the first iteration.
if (!firstRun) { if (!firstRun) {
try { try {
Thread.sleep(5000); Thread.sleep(SLEEP_PERIOD_MS);
} catch (InterruptedException ex) { } catch (InterruptedException ex) {
// Interrupt itself again to set the interrupt status // Interrupt itself again to set the interrupt status
blockScannerThread.interrupt(); blockScannerThread.interrupt();
@ -103,7 +107,7 @@ private void waitForInit() {
while ((getBlockPoolSetSize() < datanode.getAllBpOs().length) while ((getBlockPoolSetSize() < datanode.getAllBpOs().length)
|| (getBlockPoolSetSize() < 1)) { || (getBlockPoolSetSize() < 1)) {
try { try {
Thread.sleep(5000); Thread.sleep(SLEEP_PERIOD_MS);
} catch (InterruptedException e) { } catch (InterruptedException e) {
blockScannerThread.interrupt(); blockScannerThread.interrupt();
return; return;
@ -249,7 +253,7 @@ public synchronized void removeBlockPool(String blockPoolId) {
LOG.info("Removed bpid="+blockPoolId+" from blockPoolScannerMap"); LOG.info("Removed bpid="+blockPoolId+" from blockPoolScannerMap");
} }
// This method is used for testing @VisibleForTesting
long getBlocksScannedInLastRun(String bpid) throws IOException { long getBlocksScannedInLastRun(String bpid) throws IOException {
BlockPoolSliceScanner bpScanner = getBPScanner(bpid); BlockPoolSliceScanner bpScanner = getBPScanner(bpid);
if (bpScanner == null) { if (bpScanner == null) {
@ -259,6 +263,16 @@ long getBlocksScannedInLastRun(String bpid) throws IOException {
} }
} }
@VisibleForTesting
long getTotalScans(String bpid) throws IOException {
BlockPoolSliceScanner bpScanner = getBPScanner(bpid);
if (bpScanner == null) {
throw new IOException("Block Pool: "+bpid+" is not running");
} else {
return bpScanner.getTotalScans();
}
}
public void start() { public void start() {
blockScannerThread = new Thread(this); blockScannerThread = new Thread(this);
blockScannerThread.setDaemon(true); blockScannerThread.setDaemon(true);

View File

@ -20,8 +20,11 @@
import java.io.IOException; import java.io.IOException;
import junit.framework.Assert;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -31,7 +34,13 @@
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.datanode.BlockPoolSliceScanner;
import static org.apache.hadoop.hdfs.server.datanode.DataBlockScanner.SLEEP_PERIOD_MS;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.junit.Test; import org.junit.Test;
import org.junit.Ignore;
import static org.junit.Assert.fail;
public class TestMultipleNNDataBlockScanner { public class TestMultipleNNDataBlockScanner {
@ -166,4 +175,75 @@ public void testBlockScannerAfterRestart() throws IOException,
cluster.shutdown(); cluster.shutdown();
} }
} }
@Test
public void test2NNBlockRescanInterval() throws IOException {
((Log4JLogger)BlockPoolSliceScanner.LOG).getLogger().setLevel(Level.ALL);
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3))
.build();
try {
FileSystem fs = cluster.getFileSystem(1);
Path file2 = new Path("/test/testBlockScanInterval");
DFSTestUtil.createFile(fs, file2, 30, (short) 1, 0);
fs = cluster.getFileSystem(0);
Path file1 = new Path("/test/testBlockScanInterval");
DFSTestUtil.createFile(fs, file1, 30, (short) 1, 0);
for (int i = 0; i < 8; i++) {
LOG.info("Verifying that the blockscanner scans exactly once");
waitAndScanBlocks(1, 1);
}
} finally {
cluster.shutdown();
}
}
/**
* HDFS-3828: DN rescans blocks too frequently
*
* @throws Exception
*/
@Test
public void testBlockRescanInterval() throws IOException {
((Log4JLogger)BlockPoolSliceScanner.LOG).getLogger().setLevel(Level.ALL);
Configuration conf = new HdfsConfiguration();
cluster = new MiniDFSCluster.Builder(conf).build();
try {
FileSystem fs = cluster.getFileSystem();
Path file1 = new Path("/test/testBlockScanInterval");
DFSTestUtil.createFile(fs, file1, 30, (short) 1, 0);
for (int i = 0; i < 4; i++) {
LOG.info("Verifying that the blockscanner scans exactly once");
waitAndScanBlocks(1, 1);
}
} finally {
cluster.shutdown();
}
}
void waitAndScanBlocks(long scansLastRun, long scansTotal)
throws IOException {
// DataBlockScanner will run for every 5 seconds so we are checking for
// every 5 seconds
int n = 5;
String bpid = cluster.getNamesystem(0).getBlockPoolId();
DataNode dn = cluster.getDataNodes().get(0);
long blocksScanned, total;
do {
try {
Thread.sleep(SLEEP_PERIOD_MS);
} catch (InterruptedException e) {
fail("Interrupted: " + e);
}
blocksScanned = dn.blockScanner.getBlocksScannedInLastRun(bpid);
total = dn.blockScanner.getTotalScans(bpid);
LOG.info("bpid = " + bpid + " blocksScanned = " + blocksScanned + " total=" + total);
} while (n-- > 0 && (blocksScanned != scansLastRun || scansTotal != total));
Assert.assertEquals(scansTotal, total);
Assert.assertEquals(scansLastRun, blocksScanned);
}
} }