HDFS-13994. Improve DataNode BlockSender waitForMinLength. Contributed by BELUGA BEHR.

This commit is contained in:
Inigo Goiri 2018-10-19 10:36:03 -07:00
parent 9aebafd2da
commit 8b64fbab1a
5 changed files with 88 additions and 34 deletions

View File

@ -30,6 +30,7 @@
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.Arrays; import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.ChecksumException;
@ -258,7 +259,7 @@ class BlockSender implements java.io.Closeable {
} }
if (replica.getState() == ReplicaState.RBW) { if (replica.getState() == ReplicaState.RBW) {
final ReplicaInPipeline rbw = (ReplicaInPipeline) replica; final ReplicaInPipeline rbw = (ReplicaInPipeline) replica;
waitForMinLength(rbw, startOffset + length); rbw.waitForMinLength(startOffset + length, 3, TimeUnit.SECONDS);
chunkChecksum = rbw.getLastChecksumAndDataLen(); chunkChecksum = rbw.getLastChecksumAndDataLen();
} }
if (replica instanceof FinalizedReplica) { if (replica instanceof FinalizedReplica) {
@ -494,30 +495,6 @@ private static Replica getReplica(ExtendedBlock block, DataNode datanode)
return replica; return replica;
} }
/**
* Wait for rbw replica to reach the length
* @param rbw replica that is being written to
* @param len minimum length to reach
* @throws IOException on failing to reach the len in given wait time
*/
private static void waitForMinLength(ReplicaInPipeline rbw, long len)
throws IOException {
// Wait for 3 seconds for rbw replica to reach the minimum length
for (int i = 0; i < 30 && rbw.getBytesOnDisk() < len; i++) {
try {
Thread.sleep(100);
} catch (InterruptedException ie) {
throw new IOException(ie);
}
}
long bytesOnDisk = rbw.getBytesOnDisk();
if (bytesOnDisk < len) {
throw new IOException(
String.format("Need %d bytes, but only %d bytes available", len,
bytesOnDisk));
}
}
/** /**
* Converts an IOExcpetion (not subclasses) to SocketException. * Converts an IOExcpetion (not subclasses) to SocketException.
* This is typically done to indicate to upper layers that the error * This is typically done to indicate to upper layers that the error

View File

@ -22,7 +22,11 @@
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
@ -43,6 +47,10 @@
*/ */
public class LocalReplicaInPipeline extends LocalReplica public class LocalReplicaInPipeline extends LocalReplica
implements ReplicaInPipeline { implements ReplicaInPipeline {
private final Lock lock = new ReentrantLock();
private final Condition bytesOnDiskChange = lock.newCondition();
private long bytesAcked; private long bytesAcked;
private long bytesOnDisk; private long bytesOnDisk;
private byte[] lastChecksum; private byte[] lastChecksum;
@ -167,16 +175,47 @@ public void releaseAllBytesReserved() {
bytesReserved = 0; bytesReserved = 0;
} }
@Override // ReplicaInPipeline @Override
public synchronized void setLastChecksumAndDataLen(long dataLength, public void setLastChecksumAndDataLen(long dataLength, byte[] checksum) {
byte[] checksum) { lock.lock();
try {
this.bytesOnDisk = dataLength; this.bytesOnDisk = dataLength;
this.lastChecksum = checksum; this.lastChecksum = checksum;
bytesOnDiskChange.signalAll();
} finally {
lock.unlock();
}
} }
@Override // ReplicaInPipeline @Override
public synchronized ChunkChecksum getLastChecksumAndDataLen() { public ChunkChecksum getLastChecksumAndDataLen() {
lock.lock();
try {
return new ChunkChecksum(getBytesOnDisk(), lastChecksum); return new ChunkChecksum(getBytesOnDisk(), lastChecksum);
} finally {
lock.unlock();
}
}
@Override
public void waitForMinLength(long minLength, long time, TimeUnit unit)
throws IOException {
long nanos = unit.toNanos(time);
lock.lock();
try {
while (bytesOnDisk < minLength) {
if (nanos <= 0L) {
throw new IOException(
String.format("Need %d bytes, but only %d bytes available",
minLength, bytesOnDisk));
}
nanos = bytesOnDiskChange.awaitNanos(nanos);
}
} catch (InterruptedException e) {
throw new IOException(e);
} finally {
lock.unlock();
}
} }
@Override // ReplicaInPipeline @Override // ReplicaInPipeline

View File

@ -19,6 +19,7 @@
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams; import org.apache.hadoop.hdfs.server.datanode.fsdataset.ReplicaOutputStreams;
import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DataChecksum;
@ -104,4 +105,17 @@ public ReplicaOutputStreams createStreams(boolean isCreate,
* @throws IOException the waiting is interrupted * @throws IOException the waiting is interrupted
*/ */
void stopWriter(long xceiverStopTimeout) throws IOException; void stopWriter(long xceiverStopTimeout) throws IOException;
/**
* Causes the current thread to wait until a minimum length is reached, the
* thread is interrupted, or the specified waiting time elapses.
*
* @param minLength The minimum length to achieve
* @param time the maximum time to wait
* @param unit the time unit of the time argument
* @throws IOException if the current thread is interrupted or the minimum
* length is not achieved within the time allowed.
*/
void waitForMinLength(long minLength, long time, TimeUnit unit)
throws IOException;
} }

View File

@ -33,6 +33,7 @@
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import javax.management.NotCompliantMBeanException; import javax.management.NotCompliantMBeanException;
import javax.management.ObjectName; import javax.management.ObjectName;
@ -396,6 +397,23 @@ public boolean attemptToSetWriter(Thread prevWriter, Thread newWriter) {
@Override @Override
public void stopWriter(long xceiverStopTimeout) throws IOException { public void stopWriter(long xceiverStopTimeout) throws IOException {
} }
@Override
public void waitForMinLength(long minLength, long time, TimeUnit unit)
throws IOException {
final long deadLine = System.currentTimeMillis() + unit.toMillis(time);
do {
if (getBytesOnDisk() >= minLength) {
return;
}
try {
Thread.sleep(100L);
} catch (InterruptedException e) {
throw new IOException(e);
}
} while (deadLine > System.currentTimeMillis());
throw new IOException("Minimum length was not achieved within timeout");
}
} }
/** /**

View File

@ -20,6 +20,7 @@
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.datanode.ChunkChecksum; import org.apache.hadoop.hdfs.server.datanode.ChunkChecksum;
@ -129,4 +130,9 @@ public boolean attemptToSetWriter(Thread prevWriter, Thread newWriter) {
@Override @Override
public void interruptThread() { public void interruptThread() {
} }
@Override
public void waitForMinLength(long minLength, long time, TimeUnit unit)
throws IOException {
}
} }